Coverage for /builds/ase/ase/ase/ga/startgenerator.py: 95.17%
207 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3"""Tools for generating new random starting candidates."""
4import numpy as np
6from ase import Atoms
7from ase.build import molecule
8from ase.data import atomic_numbers
9from ase.ga.utilities import (
10 atoms_too_close,
11 atoms_too_close_two_sets,
12 closest_distances_generator,
13)
16class StartGenerator:
17 """Class for generating random starting candidates.
19 Its basic task consists of randomly placing atoms or
20 molecules within a predescribed box, while respecting
21 certain minimal interatomic distances.
23 Depending on the problem at hand, certain box vectors
24 may not be known or chosen beforehand, and hence also
25 need to be generated at random. Common cases include
26 bulk crystals, films and chains, with respectively
27 3, 2 and 1 unknown cell vectors.
29 Parameters:
31 slab: Atoms object
32 Specifies the cell vectors and periodic boundary conditions
33 to be applied to the randomly generated structures.
34 Any included atoms (e.g. representing an underlying slab)
35 are copied to these new structures.
36 Variable cell vectors (see number_of_variable_cell_vectors)
37 will be ignored because these will be generated at random.
39 blocks: list
40 List of building units for the structure. Each item can be:
42 * an integer: representing a single atom by its atomic number,
43 * a string: for a single atom (a chemical symbol) or a
44 molecule (name recognized by ase.build.molecule),
45 * an Atoms object,
46 * an (A, B) tuple or list where A is any of the above
47 and B is the number of A units to include.
49 A few examples:
51 >>> blocks = ['Ti'] * 4 + ['O'] * 8
52 >>> blocks = [('Ti', 4), ('O', 8)]
53 >>> blocks = [('CO2', 3)] # 3 CO2 molecules
54 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])
55 >>> blocks = [(co, 3)]
57 Each individual block (single atom or molecule) in the
58 randomly generated candidates is given a unique integer
59 tag. These can be used to preserve the molecular identity
60 of these subunits.
62 blmin: dict or float
63 Dictionary with minimal interatomic distances.
64 If a number is provided instead, the dictionary will
65 be generated with this ratio of covalent bond radii.
66 Note: when preserving molecular identity (see use_tags),
67 the blmin dict will (naturally) only be applied
68 to intermolecular distances (not the intramolecular
69 ones).
71 number_of_variable_cell_vectors: int (default 0)
72 The number of variable cell vectors (0, 1, 2 or 3).
73 To keep things simple, it is the 'first' vectors which
74 will be treated as variable, i.e. the 'a' vector in the
75 univariate case, the 'a' and 'b' vectors in the bivariate
76 case, etc.
78 box_to_place_in: [list, list of lists] (default None)
79 The box in which the atoms can be placed.
80 The default (None) means the box is equal to the
81 entire unit cell of the 'slab' object.
82 In many cases, however, smaller boxes are desired
83 (e.g. for adsorbates on a slab surface or for isolated
84 clusters). Then, box_to_place_in can be set as
85 [p0, [v1, v2, v3]] with positions being generated as
86 p0 + r1 * v1 + r2 * v2 + r3 + v3.
87 In case of one or more variable cell vectors,
88 the corresponding items in p0/v1/v2/v3 will be ignored.
90 box_volume: int or float or None (default)
91 Initial guess for the box volume in cubic Angstrom
92 (used in generating the variable cell vectors).
93 Typical values in the solid state are 8-12 A^3 per atom.
94 If there are no variable cell vectors, the default None
95 is required (box volume equal to the box_to_place_in
96 volume).
98 splits: dict or None
99 Splitting scheme for increasing the translational symmetry
100 in the random candidates, based on:
102 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__
104 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007
106 This should be a dict specifying the relative probabilities
107 for each split, written as tuples. For example,
109 >>> splits = {(2,): 3, (1,): 1}
111 This means that, for each structure, either a splitting
112 factor of 2 is applied to one randomly chosen axis,
113 or a splitting factor of 1 is applied (i.e., no splitting).
114 The probability ratio of the two scenararios will be 3:1,
115 i.e. 75% chance for the former and 25% chance for the latter
116 splitting scheme. Only the directions in which the 'slab'
117 object is periodic are eligible for splitting.
119 To e.g. always apply splitting factors of 2 and 3 along two
120 randomly chosen axes:
122 >>> splits = {(2, 3): 1}
124 By default, no splitting is applied (splits = None = {(1,): 1}).
126 cellbounds: ase.ga.utilities.CellBounds instance
127 Describing limits on the cell shape, see
128 :class:`~ase.ga.utilities.CellBounds`.
129 Note that it only make sense to impose conditions
130 regarding cell vectors which have been marked as
131 variable (see number_of_variable_cell_vectors).
133 test_dist_to_slab: bool (default True)
134 Whether to make sure that the distances between
135 the atoms and the slab satisfy the blmin.
137 test_too_far: bool (default True)
138 Whether to also make sure that there are no isolated
139 atoms or molecules with nearest-neighbour bond lengths
140 larger than 2x the value in the blmin dict.
142 rng: Random number generator
143 By default numpy.random.
144 """
146 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,
147 box_to_place_in=None, box_volume=None, splits=None,
148 cellbounds=None, test_dist_to_slab=True, test_too_far=True,
149 rng=np.random):
151 self.slab = slab
153 self.blocks = []
154 for item in blocks:
155 if isinstance(item, (tuple, list)):
156 assert len(item) == 2, 'Item length %d != 2' % len(item)
157 block, count = item
158 else:
159 block, count = item, 1
161 # Convert block into Atoms object
162 if isinstance(block, Atoms):
163 pass
164 elif block in atomic_numbers:
165 block = Atoms(block)
166 elif isinstance(block, str):
167 block = molecule(block)
168 elif block in atomic_numbers.values():
169 block = Atoms(numbers=[block])
170 else:
171 raise ValueError('Cannot parse this block:', block)
173 # Add to self.blocks, taking into account that
174 # we want to group the same blocks together.
175 # This is important for the cell splitting.
176 for i, (b, c) in enumerate(self.blocks):
177 if block == b:
178 self.blocks[i][1] += count
179 break
180 else:
181 self.blocks.append([block, count])
183 if isinstance(blmin, dict):
184 self.blmin = blmin
185 else:
186 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])
187 self.blmin = closest_distances_generator(
188 numbers,
189 ratio_of_covalent_radii=blmin)
191 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
192 assert self.number_of_variable_cell_vectors in range(4)
193 if len(self.slab) > 0:
194 msg = 'Including atoms in the slab only makes sense'
195 msg += ' if there are no variable unit cell vectors'
196 assert self.number_of_variable_cell_vectors == 0, msg
197 for i in range(self.number_of_variable_cell_vectors):
198 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '
199 msg += 'and slab must then also be periodic in this direction'
200 assert self.slab.pbc[i], msg
202 if box_to_place_in is None:
203 p0 = np.array([0., 0., 0.])
204 cell = self.slab.get_cell()
205 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]
206 else:
207 self.box_to_place_in = box_to_place_in
209 if box_volume is None:
210 assert self.number_of_variable_cell_vectors == 0
211 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))
212 else:
213 assert self.number_of_variable_cell_vectors > 0
214 self.box_volume = box_volume
215 assert self.box_volume > 0
217 if splits is None:
218 splits = {(1,): 1}
219 tot = sum(v for v in splits.values())
220 self.splits = {k: v * 1. / tot for k, v in splits.items()}
222 self.cellbounds = cellbounds
223 self.test_too_far = test_too_far
224 self.test_dist_to_slab = test_dist_to_slab
225 self.rng = rng
227 def get_new_candidate(self, maxiter=None):
228 """Returns a new candidate.
230 maxiter: upper bound on the total number of times
231 the random position generator is called
232 when generating the new candidate.
234 By default (maxiter=None) no such bound
235 is imposed. If the generator takes too
236 long time to create a new candidate, it
237 may be suitable to specify a finite value.
238 When the bound is exceeded, None is returned.
239 """
240 pbc = self.slab.get_pbc()
242 # Choose cell splitting
243 r = self.rng.random()
244 cumprob = 0
245 for split, prob in self.splits.items():
246 cumprob += prob
247 if cumprob > r:
248 break
250 # Choose direction(s) along which to split
251 # and by how much
252 directions = [i for i in range(3) if pbc[i]]
253 repeat = [1, 1, 1]
254 if len(directions) > 0:
255 for number in split:
256 d = self.rng.choice(directions)
257 repeat[d] = number
258 repeat = tuple(repeat)
260 # Generate the 'full' unit cell
261 # for the eventual candidates
262 cell = self.generate_unit_cell(repeat)
263 if self.number_of_variable_cell_vectors == 0:
264 assert np.allclose(cell, self.slab.get_cell())
266 # Make the smaller 'box' in which we are
267 # allowed to place the atoms and which will
268 # then be repeated to fill the 'full' unit cell
269 box = np.copy(cell)
270 for i in range(self.number_of_variable_cell_vectors, 3):
271 box[i] = np.array(self.box_to_place_in[1][i])
272 box /= np.array([repeat]).T
274 # Here we gather the (reduced) number of blocks
275 # to put in the smaller box, and the 'surplus'
276 # occurring when the block count is not divisible
277 # by the number of repetitions.
278 # E.g. if we have a ('Ti', 4) block and do a
279 # [2, 3, 1] repetition, we employ a ('Ti', 1)
280 # block in the smaller box and delete 2 out 6
281 # Ti atoms afterwards
282 nrep = int(np.prod(repeat))
283 blocks, ids, surplus = [], [], []
284 for i, (block, count) in enumerate(self.blocks):
285 count_part = int(np.ceil(count * 1. / nrep))
286 blocks.extend([block] * count_part)
287 surplus.append(nrep * count_part - count)
288 ids.extend([i] * count_part)
290 N_blocks = len(blocks)
292 # Shuffle the ordering so different blocks
293 # are added in random order
294 order = np.arange(N_blocks)
295 self.rng.shuffle(order)
296 blocks = [blocks[i] for i in order]
297 ids = np.array(ids)[order]
299 # Add blocks one by one until we have found
300 # a valid candidate
301 blmin = self.blmin
302 blmin_too_far = {key: 2 * val for key, val in blmin.items()}
304 niter = 0
305 while maxiter is None or niter < maxiter:
306 cand = Atoms('', cell=box, pbc=pbc)
308 for i in range(N_blocks):
309 atoms = blocks[i].copy()
310 atoms.set_tags(i)
311 atoms.set_pbc(pbc)
312 atoms.set_cell(box, scale_atoms=False)
314 while maxiter is None or niter < maxiter:
315 niter += 1
316 cop = atoms.get_positions().mean(axis=0)
317 pos = np.dot(self.rng.random((1, 3)), box)
318 atoms.translate(pos - cop)
320 if len(atoms) > 1:
321 # Apply a random rotation to multi-atom blocks
322 phi, theta, psi = 360 * self.rng.random(3)
323 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,
324 center=pos)
326 if not atoms_too_close_two_sets(cand, atoms, blmin):
327 cand += atoms
328 break
329 else:
330 # Reached maximum iteration number
331 # Break out of the for loop above
332 cand = None
333 break
335 if cand is None:
336 # Exit the main while loop
337 break
339 # Rebuild the candidate after repeating,
340 # randomly deleting surplus blocks and
341 # sorting back to the original order
342 cand_full = cand.repeat(repeat)
344 tags_full = cand_full.get_tags()
345 for i in range(nrep):
346 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks
347 cand_full.set_tags(tags_full)
349 cand = Atoms('', cell=cell, pbc=pbc)
350 ids_full = np.tile(ids, nrep)
352 tag_counter = 0
353 if len(self.slab) > 0:
354 tag_counter = int(max(self.slab.get_tags())) + 1
356 for i, (block, count) in enumerate(self.blocks):
357 tags = np.where(ids_full == i)[0]
358 bad = self.rng.choice(tags, size=surplus[i], replace=False)
359 for tag in tags:
360 if tag not in bad:
361 select = [a.index for a in cand_full if a.tag == tag]
362 atoms = cand_full[select] # is indeed a copy!
363 atoms.set_tags(tag_counter)
364 assert len(atoms) == len(block)
365 cand += atoms
366 tag_counter += 1
368 for i in range(self.number_of_variable_cell_vectors, 3):
369 cand.positions[:, i] += self.box_to_place_in[0][i]
371 # By construction, the minimal interatomic distances
372 # within the structure should already be respected
373 assert not atoms_too_close(cand, blmin, use_tags=True), \
374 'This is not supposed to happen; please report this bug'
376 if self.test_dist_to_slab and len(self.slab) > 0:
377 if atoms_too_close_two_sets(self.slab, cand, blmin):
378 continue
380 if self.test_too_far:
381 tags = cand.get_tags()
383 for tag in np.unique(tags):
384 too_far = True
385 indices_i = np.where(tags == tag)[0]
386 indices_j = np.where(tags != tag)[0]
387 too_far = not atoms_too_close_two_sets(cand[indices_i],
388 cand[indices_j],
389 blmin_too_far)
391 if too_far and len(self.slab) > 0:
392 # the block is too far from the rest
393 # but might still be sufficiently
394 # close to the slab
395 too_far = not atoms_too_close_two_sets(cand[indices_i],
396 self.slab,
397 blmin_too_far)
398 if too_far:
399 break
400 else:
401 too_far = False
403 if too_far:
404 continue
406 # Passed all the tests
407 cand = self.slab + cand
408 cand.set_cell(cell, scale_atoms=False)
409 break
410 else:
411 # Reached max iteration count in the while loop
412 return None
414 return cand
416 def generate_unit_cell(self, repeat):
417 """Generates a random unit cell.
419 For this, we use the vectors in self.slab.cell
420 in the fixed directions and randomly generate
421 the variable ones. For such a cell to be valid,
422 it has to satisfy the self.cellbounds constraints.
424 The cell will also be such that the volume of the
425 box in which the atoms can be placed (box limits
426 described by self.box_to_place_in) is equal to
427 self.box_volume.
429 Parameters:
431 repeat: tuple of 3 integers
432 Indicates by how much each cell vector
433 will later be reduced by cell splitting.
435 This is used to ensure that the original
436 cell is large enough so that the cell lengths
437 of the smaller cell exceed the largest
438 (X,X)-minimal-interatomic-distance in self.blmin.
439 """
440 # Find the minimal cell length 'Lmin'
441 # that we need in order to ensure that
442 # an added atom or molecule will never
443 # be 'too close to itself'
444 Lmin = 0.
445 for atoms, count in self.blocks:
446 dist = atoms.get_all_distances(mic=False, vector=False)
447 num = atoms.get_atomic_numbers()
449 for i in range(len(atoms)):
450 dist[i, i] += self.blmin[(num[i], num[i])]
451 for j in range(i):
452 bl = self.blmin[(num[i], num[j])]
453 dist[i, j] += bl
454 dist[j, i] += bl
456 L = np.max(dist)
457 if L > Lmin:
458 Lmin = L
460 # Generate a suitable unit cell
461 valid = False
462 while not valid:
463 cell = np.zeros((3, 3))
465 for i in range(self.number_of_variable_cell_vectors):
466 # on-diagonal values
467 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)
468 cell[i, i] *= repeat[i]
469 for j in range(i):
470 # off-diagonal values
471 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]
473 # volume scaling
474 for i in range(self.number_of_variable_cell_vectors, 3):
475 cell[i] = self.box_to_place_in[1][i]
477 if self.number_of_variable_cell_vectors > 0:
478 volume = abs(np.linalg.det(cell))
479 scaling = self.box_volume / volume
480 scaling **= 1. / self.number_of_variable_cell_vectors
481 cell[:self.number_of_variable_cell_vectors] *= scaling
483 for i in range(self.number_of_variable_cell_vectors, 3):
484 cell[i] = self.slab.get_cell()[i]
486 # bounds checking
487 valid = True
489 if self.cellbounds is not None:
490 if not self.cellbounds.is_within_bounds(cell):
491 valid = False
493 if valid:
494 for i in range(3):
495 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:
496 assert self.number_of_variable_cell_vectors > 0
497 valid = False
499 return cell