Coverage for /builds/ase/ase/ase/ga/startgenerator.py: 95.17%

207 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3"""Tools for generating new random starting candidates.""" 

4import numpy as np 

5 

6from ase import Atoms 

7from ase.build import molecule 

8from ase.data import atomic_numbers 

9from ase.ga.utilities import ( 

10 atoms_too_close, 

11 atoms_too_close_two_sets, 

12 closest_distances_generator, 

13) 

14 

15 

16class StartGenerator: 

17 """Class for generating random starting candidates. 

18 

19 Its basic task consists of randomly placing atoms or 

20 molecules within a predescribed box, while respecting 

21 certain minimal interatomic distances. 

22 

23 Depending on the problem at hand, certain box vectors 

24 may not be known or chosen beforehand, and hence also 

25 need to be generated at random. Common cases include 

26 bulk crystals, films and chains, with respectively 

27 3, 2 and 1 unknown cell vectors. 

28 

29 Parameters: 

30 

31 slab: Atoms object 

32 Specifies the cell vectors and periodic boundary conditions 

33 to be applied to the randomly generated structures. 

34 Any included atoms (e.g. representing an underlying slab) 

35 are copied to these new structures. 

36 Variable cell vectors (see number_of_variable_cell_vectors) 

37 will be ignored because these will be generated at random. 

38 

39 blocks: list 

40 List of building units for the structure. Each item can be: 

41 

42 * an integer: representing a single atom by its atomic number, 

43 * a string: for a single atom (a chemical symbol) or a 

44 molecule (name recognized by ase.build.molecule), 

45 * an Atoms object, 

46 * an (A, B) tuple or list where A is any of the above 

47 and B is the number of A units to include. 

48 

49 A few examples: 

50 

51 >>> blocks = ['Ti'] * 4 + ['O'] * 8 

52 >>> blocks = [('Ti', 4), ('O', 8)] 

53 >>> blocks = [('CO2', 3)] # 3 CO2 molecules 

54 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]]) 

55 >>> blocks = [(co, 3)] 

56 

57 Each individual block (single atom or molecule) in the 

58 randomly generated candidates is given a unique integer 

59 tag. These can be used to preserve the molecular identity 

60 of these subunits. 

61 

62 blmin: dict or float 

63 Dictionary with minimal interatomic distances. 

64 If a number is provided instead, the dictionary will 

65 be generated with this ratio of covalent bond radii. 

66 Note: when preserving molecular identity (see use_tags), 

67 the blmin dict will (naturally) only be applied 

68 to intermolecular distances (not the intramolecular 

69 ones). 

70 

71 number_of_variable_cell_vectors: int (default 0) 

72 The number of variable cell vectors (0, 1, 2 or 3). 

73 To keep things simple, it is the 'first' vectors which 

74 will be treated as variable, i.e. the 'a' vector in the 

75 univariate case, the 'a' and 'b' vectors in the bivariate 

76 case, etc. 

77 

78 box_to_place_in: [list, list of lists] (default None) 

79 The box in which the atoms can be placed. 

80 The default (None) means the box is equal to the 

81 entire unit cell of the 'slab' object. 

82 In many cases, however, smaller boxes are desired 

83 (e.g. for adsorbates on a slab surface or for isolated 

84 clusters). Then, box_to_place_in can be set as 

85 [p0, [v1, v2, v3]] with positions being generated as 

86 p0 + r1 * v1 + r2 * v2 + r3 + v3. 

87 In case of one or more variable cell vectors, 

88 the corresponding items in p0/v1/v2/v3 will be ignored. 

89 

90 box_volume: int or float or None (default) 

91 Initial guess for the box volume in cubic Angstrom 

92 (used in generating the variable cell vectors). 

93 Typical values in the solid state are 8-12 A^3 per atom. 

94 If there are no variable cell vectors, the default None 

95 is required (box volume equal to the box_to_place_in 

96 volume). 

97 

98 splits: dict or None 

99 Splitting scheme for increasing the translational symmetry 

100 in the random candidates, based on: 

101 

102 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__ 

103 

104 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007 

105 

106 This should be a dict specifying the relative probabilities 

107 for each split, written as tuples. For example, 

108 

109 >>> splits = {(2,): 3, (1,): 1} 

110 

111 This means that, for each structure, either a splitting 

112 factor of 2 is applied to one randomly chosen axis, 

113 or a splitting factor of 1 is applied (i.e., no splitting). 

114 The probability ratio of the two scenararios will be 3:1, 

115 i.e. 75% chance for the former and 25% chance for the latter 

116 splitting scheme. Only the directions in which the 'slab' 

117 object is periodic are eligible for splitting. 

118 

119 To e.g. always apply splitting factors of 2 and 3 along two 

120 randomly chosen axes: 

121 

122 >>> splits = {(2, 3): 1} 

123 

124 By default, no splitting is applied (splits = None = {(1,): 1}). 

125 

126 cellbounds: ase.ga.utilities.CellBounds instance 

127 Describing limits on the cell shape, see 

128 :class:`~ase.ga.utilities.CellBounds`. 

129 Note that it only make sense to impose conditions 

130 regarding cell vectors which have been marked as 

131 variable (see number_of_variable_cell_vectors). 

132 

133 test_dist_to_slab: bool (default True) 

134 Whether to make sure that the distances between 

135 the atoms and the slab satisfy the blmin. 

136 

137 test_too_far: bool (default True) 

138 Whether to also make sure that there are no isolated 

139 atoms or molecules with nearest-neighbour bond lengths 

140 larger than 2x the value in the blmin dict. 

141 

142 rng: Random number generator 

143 By default numpy.random. 

144 """ 

145 

146 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0, 

147 box_to_place_in=None, box_volume=None, splits=None, 

148 cellbounds=None, test_dist_to_slab=True, test_too_far=True, 

149 rng=np.random): 

150 

151 self.slab = slab 

152 

153 self.blocks = [] 

154 for item in blocks: 

155 if isinstance(item, (tuple, list)): 

156 assert len(item) == 2, 'Item length %d != 2' % len(item) 

157 block, count = item 

158 else: 

159 block, count = item, 1 

160 

161 # Convert block into Atoms object 

162 if isinstance(block, Atoms): 

163 pass 

164 elif block in atomic_numbers: 

165 block = Atoms(block) 

166 elif isinstance(block, str): 

167 block = molecule(block) 

168 elif block in atomic_numbers.values(): 

169 block = Atoms(numbers=[block]) 

170 else: 

171 raise ValueError('Cannot parse this block:', block) 

172 

173 # Add to self.blocks, taking into account that 

174 # we want to group the same blocks together. 

175 # This is important for the cell splitting. 

176 for i, (b, c) in enumerate(self.blocks): 

177 if block == b: 

178 self.blocks[i][1] += count 

179 break 

180 else: 

181 self.blocks.append([block, count]) 

182 

183 if isinstance(blmin, dict): 

184 self.blmin = blmin 

185 else: 

186 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks]) 

187 self.blmin = closest_distances_generator( 

188 numbers, 

189 ratio_of_covalent_radii=blmin) 

190 

191 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

192 assert self.number_of_variable_cell_vectors in range(4) 

193 if len(self.slab) > 0: 

194 msg = 'Including atoms in the slab only makes sense' 

195 msg += ' if there are no variable unit cell vectors' 

196 assert self.number_of_variable_cell_vectors == 0, msg 

197 for i in range(self.number_of_variable_cell_vectors): 

198 msg = f'Unit cell {("abc"[i])}-vector is marked as variable ' 

199 msg += 'and slab must then also be periodic in this direction' 

200 assert self.slab.pbc[i], msg 

201 

202 if box_to_place_in is None: 

203 p0 = np.array([0., 0., 0.]) 

204 cell = self.slab.get_cell() 

205 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]] 

206 else: 

207 self.box_to_place_in = box_to_place_in 

208 

209 if box_volume is None: 

210 assert self.number_of_variable_cell_vectors == 0 

211 box_volume = abs(np.linalg.det(self.box_to_place_in[1])) 

212 else: 

213 assert self.number_of_variable_cell_vectors > 0 

214 self.box_volume = box_volume 

215 assert self.box_volume > 0 

216 

217 if splits is None: 

218 splits = {(1,): 1} 

219 tot = sum(v for v in splits.values()) 

220 self.splits = {k: v * 1. / tot for k, v in splits.items()} 

221 

222 self.cellbounds = cellbounds 

223 self.test_too_far = test_too_far 

224 self.test_dist_to_slab = test_dist_to_slab 

225 self.rng = rng 

226 

227 def get_new_candidate(self, maxiter=None): 

228 """Returns a new candidate. 

229 

230 maxiter: upper bound on the total number of times 

231 the random position generator is called 

232 when generating the new candidate. 

233 

234 By default (maxiter=None) no such bound 

235 is imposed. If the generator takes too 

236 long time to create a new candidate, it 

237 may be suitable to specify a finite value. 

238 When the bound is exceeded, None is returned. 

239 """ 

240 pbc = self.slab.get_pbc() 

241 

242 # Choose cell splitting 

243 r = self.rng.random() 

244 cumprob = 0 

245 for split, prob in self.splits.items(): 

246 cumprob += prob 

247 if cumprob > r: 

248 break 

249 

250 # Choose direction(s) along which to split 

251 # and by how much 

252 directions = [i for i in range(3) if pbc[i]] 

253 repeat = [1, 1, 1] 

254 if len(directions) > 0: 

255 for number in split: 

256 d = self.rng.choice(directions) 

257 repeat[d] = number 

258 repeat = tuple(repeat) 

259 

260 # Generate the 'full' unit cell 

261 # for the eventual candidates 

262 cell = self.generate_unit_cell(repeat) 

263 if self.number_of_variable_cell_vectors == 0: 

264 assert np.allclose(cell, self.slab.get_cell()) 

265 

266 # Make the smaller 'box' in which we are 

267 # allowed to place the atoms and which will 

268 # then be repeated to fill the 'full' unit cell 

269 box = np.copy(cell) 

270 for i in range(self.number_of_variable_cell_vectors, 3): 

271 box[i] = np.array(self.box_to_place_in[1][i]) 

272 box /= np.array([repeat]).T 

273 

274 # Here we gather the (reduced) number of blocks 

275 # to put in the smaller box, and the 'surplus' 

276 # occurring when the block count is not divisible 

277 # by the number of repetitions. 

278 # E.g. if we have a ('Ti', 4) block and do a 

279 # [2, 3, 1] repetition, we employ a ('Ti', 1) 

280 # block in the smaller box and delete 2 out 6 

281 # Ti atoms afterwards 

282 nrep = int(np.prod(repeat)) 

283 blocks, ids, surplus = [], [], [] 

284 for i, (block, count) in enumerate(self.blocks): 

285 count_part = int(np.ceil(count * 1. / nrep)) 

286 blocks.extend([block] * count_part) 

287 surplus.append(nrep * count_part - count) 

288 ids.extend([i] * count_part) 

289 

290 N_blocks = len(blocks) 

291 

292 # Shuffle the ordering so different blocks 

293 # are added in random order 

294 order = np.arange(N_blocks) 

295 self.rng.shuffle(order) 

296 blocks = [blocks[i] for i in order] 

297 ids = np.array(ids)[order] 

298 

299 # Add blocks one by one until we have found 

300 # a valid candidate 

301 blmin = self.blmin 

302 blmin_too_far = {key: 2 * val for key, val in blmin.items()} 

303 

304 niter = 0 

305 while maxiter is None or niter < maxiter: 

306 cand = Atoms('', cell=box, pbc=pbc) 

307 

308 for i in range(N_blocks): 

309 atoms = blocks[i].copy() 

310 atoms.set_tags(i) 

311 atoms.set_pbc(pbc) 

312 atoms.set_cell(box, scale_atoms=False) 

313 

314 while maxiter is None or niter < maxiter: 

315 niter += 1 

316 cop = atoms.get_positions().mean(axis=0) 

317 pos = np.dot(self.rng.random((1, 3)), box) 

318 atoms.translate(pos - cop) 

319 

320 if len(atoms) > 1: 

321 # Apply a random rotation to multi-atom blocks 

322 phi, theta, psi = 360 * self.rng.random(3) 

323 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi, 

324 center=pos) 

325 

326 if not atoms_too_close_two_sets(cand, atoms, blmin): 

327 cand += atoms 

328 break 

329 else: 

330 # Reached maximum iteration number 

331 # Break out of the for loop above 

332 cand = None 

333 break 

334 

335 if cand is None: 

336 # Exit the main while loop 

337 break 

338 

339 # Rebuild the candidate after repeating, 

340 # randomly deleting surplus blocks and 

341 # sorting back to the original order 

342 cand_full = cand.repeat(repeat) 

343 

344 tags_full = cand_full.get_tags() 

345 for i in range(nrep): 

346 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks 

347 cand_full.set_tags(tags_full) 

348 

349 cand = Atoms('', cell=cell, pbc=pbc) 

350 ids_full = np.tile(ids, nrep) 

351 

352 tag_counter = 0 

353 if len(self.slab) > 0: 

354 tag_counter = int(max(self.slab.get_tags())) + 1 

355 

356 for i, (block, count) in enumerate(self.blocks): 

357 tags = np.where(ids_full == i)[0] 

358 bad = self.rng.choice(tags, size=surplus[i], replace=False) 

359 for tag in tags: 

360 if tag not in bad: 

361 select = [a.index for a in cand_full if a.tag == tag] 

362 atoms = cand_full[select] # is indeed a copy! 

363 atoms.set_tags(tag_counter) 

364 assert len(atoms) == len(block) 

365 cand += atoms 

366 tag_counter += 1 

367 

368 for i in range(self.number_of_variable_cell_vectors, 3): 

369 cand.positions[:, i] += self.box_to_place_in[0][i] 

370 

371 # By construction, the minimal interatomic distances 

372 # within the structure should already be respected 

373 assert not atoms_too_close(cand, blmin, use_tags=True), \ 

374 'This is not supposed to happen; please report this bug' 

375 

376 if self.test_dist_to_slab and len(self.slab) > 0: 

377 if atoms_too_close_two_sets(self.slab, cand, blmin): 

378 continue 

379 

380 if self.test_too_far: 

381 tags = cand.get_tags() 

382 

383 for tag in np.unique(tags): 

384 too_far = True 

385 indices_i = np.where(tags == tag)[0] 

386 indices_j = np.where(tags != tag)[0] 

387 too_far = not atoms_too_close_two_sets(cand[indices_i], 

388 cand[indices_j], 

389 blmin_too_far) 

390 

391 if too_far and len(self.slab) > 0: 

392 # the block is too far from the rest 

393 # but might still be sufficiently 

394 # close to the slab 

395 too_far = not atoms_too_close_two_sets(cand[indices_i], 

396 self.slab, 

397 blmin_too_far) 

398 if too_far: 

399 break 

400 else: 

401 too_far = False 

402 

403 if too_far: 

404 continue 

405 

406 # Passed all the tests 

407 cand = self.slab + cand 

408 cand.set_cell(cell, scale_atoms=False) 

409 break 

410 else: 

411 # Reached max iteration count in the while loop 

412 return None 

413 

414 return cand 

415 

416 def generate_unit_cell(self, repeat): 

417 """Generates a random unit cell. 

418 

419 For this, we use the vectors in self.slab.cell 

420 in the fixed directions and randomly generate 

421 the variable ones. For such a cell to be valid, 

422 it has to satisfy the self.cellbounds constraints. 

423 

424 The cell will also be such that the volume of the 

425 box in which the atoms can be placed (box limits 

426 described by self.box_to_place_in) is equal to 

427 self.box_volume. 

428 

429 Parameters: 

430 

431 repeat: tuple of 3 integers 

432 Indicates by how much each cell vector 

433 will later be reduced by cell splitting. 

434 

435 This is used to ensure that the original 

436 cell is large enough so that the cell lengths 

437 of the smaller cell exceed the largest 

438 (X,X)-minimal-interatomic-distance in self.blmin. 

439 """ 

440 # Find the minimal cell length 'Lmin' 

441 # that we need in order to ensure that 

442 # an added atom or molecule will never 

443 # be 'too close to itself' 

444 Lmin = 0. 

445 for atoms, count in self.blocks: 

446 dist = atoms.get_all_distances(mic=False, vector=False) 

447 num = atoms.get_atomic_numbers() 

448 

449 for i in range(len(atoms)): 

450 dist[i, i] += self.blmin[(num[i], num[i])] 

451 for j in range(i): 

452 bl = self.blmin[(num[i], num[j])] 

453 dist[i, j] += bl 

454 dist[j, i] += bl 

455 

456 L = np.max(dist) 

457 if L > Lmin: 

458 Lmin = L 

459 

460 # Generate a suitable unit cell 

461 valid = False 

462 while not valid: 

463 cell = np.zeros((3, 3)) 

464 

465 for i in range(self.number_of_variable_cell_vectors): 

466 # on-diagonal values 

467 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume) 

468 cell[i, i] *= repeat[i] 

469 for j in range(i): 

470 # off-diagonal values 

471 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1] 

472 

473 # volume scaling 

474 for i in range(self.number_of_variable_cell_vectors, 3): 

475 cell[i] = self.box_to_place_in[1][i] 

476 

477 if self.number_of_variable_cell_vectors > 0: 

478 volume = abs(np.linalg.det(cell)) 

479 scaling = self.box_volume / volume 

480 scaling **= 1. / self.number_of_variable_cell_vectors 

481 cell[:self.number_of_variable_cell_vectors] *= scaling 

482 

483 for i in range(self.number_of_variable_cell_vectors, 3): 

484 cell[i] = self.slab.get_cell()[i] 

485 

486 # bounds checking 

487 valid = True 

488 

489 if self.cellbounds is not None: 

490 if not self.cellbounds.is_within_bounds(cell): 

491 valid = False 

492 

493 if valid: 

494 for i in range(3): 

495 if np.linalg.norm(cell[i]) < repeat[i] * Lmin: 

496 assert self.number_of_variable_cell_vectors > 0 

497 valid = False 

498 

499 return cell