Coverage for /builds/ase/ase/ase/ga/cutandsplicepairing.py: 90.19%
214 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3"""Implementation of the cut-and-splice paring operator."""
4import numpy as np
6from ase import Atoms
7from ase.ga.offspring_creator import OffspringCreator
8from ase.ga.utilities import (
9 atoms_too_close,
10 atoms_too_close_two_sets,
11 gather_atoms_by_tag,
12)
13from ase.geometry import find_mic
16class Positions:
17 """Helper object to simplify the pairing process.
19 Parameters:
21 scaled_positions: (Nx3) array
22 Positions in scaled coordinates
23 cop: (1x3) array
24 Center-of-positions (also in scaled coordinates)
25 symbols: str
26 String with the atomic symbols
27 distance: float
28 Signed distance to the cutting plane
29 origin: int (0 or 1)
30 Determines at which side of the plane the position should be.
31 """
33 def __init__(self, scaled_positions, cop, symbols, distance, origin):
34 self.scaled_positions = scaled_positions
35 self.cop = cop
36 self.symbols = symbols
37 self.distance = distance
38 self.origin = origin
40 def to_use(self):
41 """Tells whether this position is at the right side."""
42 if self.distance > 0. and self.origin == 0:
43 return True
44 elif self.distance < 0. and self.origin == 1:
45 return True
46 else:
47 return False
50class CutAndSplicePairing(OffspringCreator):
51 """The Cut and Splice operator by Deaven and Ho.
53 Creates offspring from two parent structures using
54 a randomly generated cutting plane.
56 The parents may have different unit cells, in which
57 case the offspring unit cell will be a random combination
58 of the parent cells.
60 The basic implementation (for fixed unit cells) is
61 described in:
63 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012)
64 <10.1103/PhysRevLett.108.126101`>
66 The extension to variable unit cells is similar to:
68 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720
69 <10.1016/j.cpc.2006.07.020>`
71 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387
72 <10.1016/j.cpc.2010.07.048>`
74 The operator can furthermore preserve molecular identity
75 if desired (see the *use_tags* kwarg). Atoms with the same
76 tag will then be considered as belonging to the same molecule,
77 and their internal geometry will not be changed by the operator.
79 If use_tags is enabled, the operator will also conserve the
80 number of molecules of each kind (in addition to conserving
81 the overall stoichiometry). Currently, molecules are considered
82 to be of the same kind if their chemical symbol strings are
83 identical. In rare cases where this may not be sufficient
84 (i.e. when desiring to keep the same ratio of isomers), the
85 different isomers can be differentiated by giving them different
86 elemental orderings (e.g. 'XY2' and 'Y2X').
88 Parameters:
90 slab: Atoms object
91 Specifies the cell vectors and periodic boundary conditions
92 to be applied to the randomly generated structures.
93 Any included atoms (e.g. representing an underlying slab)
94 are copied to these new structures.
96 n_top: int
97 The number of atoms to optimize
99 blmin: dict
100 Dictionary with minimal interatomic distances.
101 Note: when preserving molecular identity (see use_tags),
102 the blmin dict will (naturally) only be applied
103 to intermolecular distances (not the intramolecular
104 ones).
106 number_of_variable_cell_vectors: int (default 0)
107 The number of variable cell vectors (0, 1, 2 or 3).
108 To keep things simple, it is the 'first' vectors which
109 will be treated as variable, i.e. the 'a' vector in the
110 univariate case, the 'a' and 'b' vectors in the bivariate
111 case, etc.
113 p1: float or int between 0 and 1
114 Probability that a parent is shifted over a random
115 distance along the normal of the cutting plane
116 (only operative if number_of_variable_cell_vectors > 0).
118 p2: float or int between 0 and 1
119 Same as p1, but for shifting along the directions
120 in the cutting plane (only operative if
121 number_of_variable_cell_vectors > 0).
123 minfrac: float between 0 and 1, or None (default)
124 Minimal fraction of atoms a parent must contribute
125 to the child. If None, each parent must contribute
126 at least one atom.
128 cellbounds: ase.ga.utilities.CellBounds instance
129 Describing limits on the cell shape, see
130 :class:`~ase.ga.utilities.CellBounds`.
131 Note that it only make sense to impose conditions
132 regarding cell vectors which have been marked as
133 variable (see number_of_variable_cell_vectors).
135 use_tags: bool
136 Whether to use the atomic tags to preserve
137 molecular identity.
139 test_dist_to_slab: bool (default True)
140 Whether to make sure that the distances between
141 the atoms and the slab satisfy the blmin.
143 rng: Random number generator
144 By default numpy.random.
145 """
147 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0,
148 p1=1, p2=0.05, minfrac=None, cellbounds=None,
149 test_dist_to_slab=True, use_tags=False, rng=np.random,
150 verbose=False):
152 OffspringCreator.__init__(self, verbose, rng=rng)
153 self.slab = slab
154 self.n_top = n_top
155 self.blmin = blmin
156 assert number_of_variable_cell_vectors in range(4)
157 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
158 self.p1 = p1
159 self.p2 = p2
160 self.minfrac = minfrac
161 self.cellbounds = cellbounds
162 self.test_dist_to_slab = test_dist_to_slab
163 self.use_tags = use_tags
165 self.scaling_volume = None
166 self.descriptor = 'CutAndSplicePairing'
167 self.min_inputs = 2
169 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0):
170 """Updates the scaling volume that is used in the pairing
172 w_adapt: weight of the new vs the old scaling volume
173 n_adapt: number of best candidates in the population that
174 are used to calculate the new scaling volume
175 """
176 if not n_adapt:
177 # take best 20% of the population
178 n_adapt = int(np.ceil(0.2 * len(population)))
179 v_new = np.mean([a.get_volume() for a in population[:n_adapt]])
181 if not self.scaling_volume:
182 self.scaling_volume = v_new
183 else:
184 volumes = [self.scaling_volume, v_new]
185 weights = [1 - w_adapt, w_adapt]
186 self.scaling_volume = np.average(volumes, weights=weights)
188 def get_new_individual(self, parents):
189 """The method called by the user that
190 returns the paired structure."""
191 f, m = parents
193 indi = self.cross(f, m)
194 desc = 'pairing: {} {}'.format(f.info['confid'],
195 m.info['confid'])
196 # It is ok for an operator to return None
197 # It means that it could not make a legal offspring
198 # within a reasonable amount of time
199 if indi is None:
200 return indi, desc
201 indi = self.initialize_individual(f, indi)
202 indi.info['data']['parents'] = [f.info['confid'],
203 m.info['confid']]
205 return self.finalize_individual(indi), desc
207 def cross(self, a1, a2):
208 """Crosses the two atoms objects and returns one"""
210 if len(a1) != len(self.slab) + self.n_top:
211 raise ValueError('Wrong size of structure to optimize')
212 if len(a1) != len(a2):
213 raise ValueError('The two structures do not have the same length')
215 N = self.n_top
217 # Only consider the atoms to optimize
218 a1 = a1[len(a1) - N: len(a1)]
219 a2 = a2[len(a2) - N: len(a2)]
221 if not np.array_equal(a1.numbers, a2.numbers):
222 err = 'Trying to pair two structures with different stoichiometry'
223 raise ValueError(err)
225 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()):
226 err = 'Trying to pair two structures with different tags'
227 raise ValueError(err)
229 cell1 = a1.get_cell()
230 cell2 = a2.get_cell()
231 for i in range(self.number_of_variable_cell_vectors, 3):
232 err = 'Unit cells are supposed to be identical in direction %d'
233 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2)
235 invalid = True
236 counter = 0
237 maxcount = 1000
238 a1_copy = a1.copy()
239 a2_copy = a2.copy()
241 # Run until a valid pairing is made or maxcount pairings are tested.
242 while invalid and counter < maxcount:
243 counter += 1
245 newcell = self.generate_unit_cell(cell1, cell2)
246 if newcell is None:
247 # No valid unit cell could be generated.
248 # This strongly suggests that it is near-impossible
249 # to generate one from these parent cells and it is
250 # better to abort now.
251 break
253 # Choose direction of cutting plane normal
254 if self.number_of_variable_cell_vectors == 0:
255 # Will be generated entirely at random
256 theta = np.pi * self.rng.random()
257 phi = 2. * np.pi * self.rng.random()
258 cut_n = np.array([np.cos(phi) * np.sin(theta),
259 np.sin(phi) * np.sin(theta), np.cos(theta)])
260 else:
261 # Pick one of the 'variable' cell vectors
262 cut_n = self.rng.choice(self.number_of_variable_cell_vectors)
264 # Randomly translate parent structures
265 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]):
266 a_copy.set_positions(a.get_positions())
268 cell = a_copy.get_cell()
269 for i in range(self.number_of_variable_cell_vectors):
270 r = self.rng.random()
271 cond1 = i == cut_n and r < self.p1
272 cond2 = i != cut_n and r < self.p2
273 if cond1 or cond2:
274 a_copy.positions += self.rng.random() * cell[i]
276 if self.use_tags:
277 # For correct determination of the center-
278 # of-position of the multi-atom blocks,
279 # we need to group their constituent atoms
280 # together
281 gather_atoms_by_tag(a_copy)
282 else:
283 a_copy.wrap()
285 # Generate the cutting point in scaled coordinates
286 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0)
287 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0)
288 cut_p = np.zeros((1, 3))
289 for i in range(3):
290 if i < self.number_of_variable_cell_vectors:
291 cut_p[0, i] = self.rng.random()
292 else:
293 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i])
295 # Perform the pairing:
296 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell)
297 if child is None:
298 continue
300 # Verify whether the atoms are too close or not:
301 if atoms_too_close(child, self.blmin, use_tags=self.use_tags):
302 continue
304 if self.test_dist_to_slab and len(self.slab) > 0:
305 if atoms_too_close_two_sets(self.slab, child, self.blmin):
306 continue
308 # Passed all the tests
309 child = self.slab + child
310 child.set_cell(newcell, scale_atoms=False)
311 child.wrap()
312 return child
314 return None
316 def generate_unit_cell(self, cell1, cell2, maxcount=10000):
317 """Generates a new unit cell by a random linear combination
318 of the parent cells. The new cell must satisfy the
319 self.cellbounds constraints. Returns None if no such cell
320 was generated within a given number of attempts.
322 Parameters:
324 maxcount: int
325 The maximal number of attempts.
326 """
327 # First calculate the scaling volume
328 if not self.scaling_volume:
329 v1 = np.abs(np.linalg.det(cell1))
330 v2 = np.abs(np.linalg.det(cell2))
331 r = self.rng.random()
332 v_ref = r * v1 + (1 - r) * v2
333 else:
334 v_ref = self.scaling_volume
336 # Now the cell vectors
337 if self.number_of_variable_cell_vectors == 0:
338 assert np.allclose(cell1, cell2), 'Parent cells are not the same'
339 newcell = np.copy(cell1)
340 else:
341 count = 0
342 while count < maxcount:
343 r = self.rng.random()
344 newcell = r * cell1 + (1 - r) * cell2
346 vol = abs(np.linalg.det(newcell))
347 scaling = v_ref / vol
348 scaling **= 1. / self.number_of_variable_cell_vectors
349 newcell[:self.number_of_variable_cell_vectors] *= scaling
351 found = True
352 if self.cellbounds is not None:
353 found = self.cellbounds.is_within_bounds(newcell)
354 if found:
355 break
357 count += 1
358 else:
359 # Did not find acceptable cell
360 newcell = None
362 return newcell
364 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell):
365 """Creates a child from two parents using the given cut.
367 Returns None if the generated structure does not contain
368 a large enough fraction of each parent (see self.minfrac).
370 Does not check whether atoms are too close.
372 Assumes the 'slab' parts have been removed from the parent
373 structures and that these have been checked for equal
374 lengths, stoichiometries, and tags (if self.use_tags).
376 Parameters:
378 cutting_normal: int or (1x3) array
380 cutting_point: (1x3) array
381 In fractional coordinates
383 cell: (3x3) array
384 The unit cell for the child structure
385 """
386 symbols = a1.get_chemical_symbols()
387 tags = a1.get_tags() if self.use_tags else np.arange(len(a1))
389 # Generate list of all atoms / atom groups:
390 p1, p2, sym = [], [], []
391 for i in np.unique(tags):
392 indices = np.where(tags == i)[0]
393 s = ''.join([symbols[j] for j in indices])
394 sym.append(s)
396 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])):
397 c = a.get_cell()
398 cop = np.mean(a.positions[indices], axis=0)
399 cut_p = np.dot(cutting_point, c)
400 if isinstance(cutting_normal, int):
401 vecs = [c[j] for j in range(3) if j != cutting_normal]
402 cut_n = np.cross(vecs[0], vecs[1])
403 else:
404 cut_n = np.dot(cutting_normal, c)
405 d = np.dot(cop - cut_p, cut_n)
406 spos = a.get_scaled_positions()[indices]
407 scop = np.mean(spos, axis=0)
408 p.append(Positions(spos, scop, s, d, i))
410 all_points = p1 + p2
411 unique_sym = np.unique(sym)
412 types = {s: sym.count(s) for s in unique_sym}
414 # Sort these by chemical symbols:
415 all_points.sort(key=lambda x: x.symbols, reverse=True)
417 # For each atom type make the pairing
418 unique_sym.sort()
419 use_total = {}
420 for s in unique_sym:
421 used = []
422 not_used = []
423 # The list is looked trough in
424 # reverse order so atoms can be removed
425 # from the list along the way.
426 for i in reversed(range(len(all_points))):
427 # If there are no more atoms of this type
428 if all_points[i].symbols != s:
429 break
430 # Check if the atom should be included
431 if all_points[i].to_use():
432 used.append(all_points.pop(i))
433 else:
434 not_used.append(all_points.pop(i))
436 assert len(used) + len(not_used) == types[s] * 2
438 # While we have too few of the given atom type
439 while len(used) < types[s]:
440 index = self.rng.randint(len(not_used))
441 used.append(not_used.pop(index))
443 # While we have too many of the given atom type
444 while len(used) > types[s]:
445 # remove randomly:
446 index = self.rng.randint(len(used))
447 not_used.append(used.pop(index))
449 use_total[s] = used
451 n_tot = sum(len(ll) for ll in use_total.values())
452 assert n_tot == len(sym)
454 # check if the generated structure contains
455 # atoms from both parents:
456 count1, count2, N = 0, 0, len(a1)
457 for x in use_total.values():
458 count1 += sum(y.origin == 0 for y in x)
459 count2 += sum(y.origin == 1 for y in x)
461 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N))
462 if count1 < nmin or count2 < nmin:
463 return None
465 # Construct the cartesian positions and reorder the atoms
466 # to follow the original order
467 newpos = []
468 pbc = a1.get_pbc()
469 for s in sym:
470 p = use_total[s].pop()
471 c = a1.get_cell() if p.origin == 0 else a2.get_cell()
472 pos = np.dot(p.scaled_positions, c)
473 cop = np.dot(p.cop, c)
474 vectors, _lengths = find_mic(pos - cop, c, pbc)
475 newcop = np.dot(p.cop, cell)
476 pos = newcop + vectors
477 for row in pos:
478 newpos.append(row)
480 newpos = np.reshape(newpos, (N, 3))
481 num = a1.get_atomic_numbers()
482 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell,
483 tags=tags)
484 child.wrap()
485 return child