Coverage for /builds/ase/ase/ase/ga/cutandsplicepairing.py: 90.19%

214 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3"""Implementation of the cut-and-splice paring operator.""" 

4import numpy as np 

5 

6from ase import Atoms 

7from ase.ga.offspring_creator import OffspringCreator 

8from ase.ga.utilities import ( 

9 atoms_too_close, 

10 atoms_too_close_two_sets, 

11 gather_atoms_by_tag, 

12) 

13from ase.geometry import find_mic 

14 

15 

16class Positions: 

17 """Helper object to simplify the pairing process. 

18 

19 Parameters: 

20 

21 scaled_positions: (Nx3) array 

22 Positions in scaled coordinates 

23 cop: (1x3) array 

24 Center-of-positions (also in scaled coordinates) 

25 symbols: str 

26 String with the atomic symbols 

27 distance: float 

28 Signed distance to the cutting plane 

29 origin: int (0 or 1) 

30 Determines at which side of the plane the position should be. 

31 """ 

32 

33 def __init__(self, scaled_positions, cop, symbols, distance, origin): 

34 self.scaled_positions = scaled_positions 

35 self.cop = cop 

36 self.symbols = symbols 

37 self.distance = distance 

38 self.origin = origin 

39 

40 def to_use(self): 

41 """Tells whether this position is at the right side.""" 

42 if self.distance > 0. and self.origin == 0: 

43 return True 

44 elif self.distance < 0. and self.origin == 1: 

45 return True 

46 else: 

47 return False 

48 

49 

50class CutAndSplicePairing(OffspringCreator): 

51 """The Cut and Splice operator by Deaven and Ho. 

52 

53 Creates offspring from two parent structures using 

54 a randomly generated cutting plane. 

55 

56 The parents may have different unit cells, in which 

57 case the offspring unit cell will be a random combination 

58 of the parent cells. 

59 

60 The basic implementation (for fixed unit cells) is 

61 described in: 

62 

63 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012) 

64 <10.1103/PhysRevLett.108.126101`> 

65 

66 The extension to variable unit cells is similar to: 

67 

68 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720 

69 <10.1016/j.cpc.2006.07.020>` 

70 

71 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387 

72 <10.1016/j.cpc.2010.07.048>` 

73 

74 The operator can furthermore preserve molecular identity 

75 if desired (see the *use_tags* kwarg). Atoms with the same 

76 tag will then be considered as belonging to the same molecule, 

77 and their internal geometry will not be changed by the operator. 

78 

79 If use_tags is enabled, the operator will also conserve the 

80 number of molecules of each kind (in addition to conserving 

81 the overall stoichiometry). Currently, molecules are considered 

82 to be of the same kind if their chemical symbol strings are 

83 identical. In rare cases where this may not be sufficient 

84 (i.e. when desiring to keep the same ratio of isomers), the 

85 different isomers can be differentiated by giving them different 

86 elemental orderings (e.g. 'XY2' and 'Y2X'). 

87 

88 Parameters: 

89 

90 slab: Atoms object 

91 Specifies the cell vectors and periodic boundary conditions 

92 to be applied to the randomly generated structures. 

93 Any included atoms (e.g. representing an underlying slab) 

94 are copied to these new structures. 

95 

96 n_top: int 

97 The number of atoms to optimize 

98 

99 blmin: dict 

100 Dictionary with minimal interatomic distances. 

101 Note: when preserving molecular identity (see use_tags), 

102 the blmin dict will (naturally) only be applied 

103 to intermolecular distances (not the intramolecular 

104 ones). 

105 

106 number_of_variable_cell_vectors: int (default 0) 

107 The number of variable cell vectors (0, 1, 2 or 3). 

108 To keep things simple, it is the 'first' vectors which 

109 will be treated as variable, i.e. the 'a' vector in the 

110 univariate case, the 'a' and 'b' vectors in the bivariate 

111 case, etc. 

112 

113 p1: float or int between 0 and 1 

114 Probability that a parent is shifted over a random 

115 distance along the normal of the cutting plane 

116 (only operative if number_of_variable_cell_vectors > 0). 

117 

118 p2: float or int between 0 and 1 

119 Same as p1, but for shifting along the directions 

120 in the cutting plane (only operative if 

121 number_of_variable_cell_vectors > 0). 

122 

123 minfrac: float between 0 and 1, or None (default) 

124 Minimal fraction of atoms a parent must contribute 

125 to the child. If None, each parent must contribute 

126 at least one atom. 

127 

128 cellbounds: ase.ga.utilities.CellBounds instance 

129 Describing limits on the cell shape, see 

130 :class:`~ase.ga.utilities.CellBounds`. 

131 Note that it only make sense to impose conditions 

132 regarding cell vectors which have been marked as 

133 variable (see number_of_variable_cell_vectors). 

134 

135 use_tags: bool 

136 Whether to use the atomic tags to preserve 

137 molecular identity. 

138 

139 test_dist_to_slab: bool (default True) 

140 Whether to make sure that the distances between 

141 the atoms and the slab satisfy the blmin. 

142 

143 rng: Random number generator 

144 By default numpy.random. 

145 """ 

146 

147 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0, 

148 p1=1, p2=0.05, minfrac=None, cellbounds=None, 

149 test_dist_to_slab=True, use_tags=False, rng=np.random, 

150 verbose=False): 

151 

152 OffspringCreator.__init__(self, verbose, rng=rng) 

153 self.slab = slab 

154 self.n_top = n_top 

155 self.blmin = blmin 

156 assert number_of_variable_cell_vectors in range(4) 

157 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

158 self.p1 = p1 

159 self.p2 = p2 

160 self.minfrac = minfrac 

161 self.cellbounds = cellbounds 

162 self.test_dist_to_slab = test_dist_to_slab 

163 self.use_tags = use_tags 

164 

165 self.scaling_volume = None 

166 self.descriptor = 'CutAndSplicePairing' 

167 self.min_inputs = 2 

168 

169 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0): 

170 """Updates the scaling volume that is used in the pairing 

171 

172 w_adapt: weight of the new vs the old scaling volume 

173 n_adapt: number of best candidates in the population that 

174 are used to calculate the new scaling volume 

175 """ 

176 if not n_adapt: 

177 # take best 20% of the population 

178 n_adapt = int(np.ceil(0.2 * len(population))) 

179 v_new = np.mean([a.get_volume() for a in population[:n_adapt]]) 

180 

181 if not self.scaling_volume: 

182 self.scaling_volume = v_new 

183 else: 

184 volumes = [self.scaling_volume, v_new] 

185 weights = [1 - w_adapt, w_adapt] 

186 self.scaling_volume = np.average(volumes, weights=weights) 

187 

188 def get_new_individual(self, parents): 

189 """The method called by the user that 

190 returns the paired structure.""" 

191 f, m = parents 

192 

193 indi = self.cross(f, m) 

194 desc = 'pairing: {} {}'.format(f.info['confid'], 

195 m.info['confid']) 

196 # It is ok for an operator to return None 

197 # It means that it could not make a legal offspring 

198 # within a reasonable amount of time 

199 if indi is None: 

200 return indi, desc 

201 indi = self.initialize_individual(f, indi) 

202 indi.info['data']['parents'] = [f.info['confid'], 

203 m.info['confid']] 

204 

205 return self.finalize_individual(indi), desc 

206 

207 def cross(self, a1, a2): 

208 """Crosses the two atoms objects and returns one""" 

209 

210 if len(a1) != len(self.slab) + self.n_top: 

211 raise ValueError('Wrong size of structure to optimize') 

212 if len(a1) != len(a2): 

213 raise ValueError('The two structures do not have the same length') 

214 

215 N = self.n_top 

216 

217 # Only consider the atoms to optimize 

218 a1 = a1[len(a1) - N: len(a1)] 

219 a2 = a2[len(a2) - N: len(a2)] 

220 

221 if not np.array_equal(a1.numbers, a2.numbers): 

222 err = 'Trying to pair two structures with different stoichiometry' 

223 raise ValueError(err) 

224 

225 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()): 

226 err = 'Trying to pair two structures with different tags' 

227 raise ValueError(err) 

228 

229 cell1 = a1.get_cell() 

230 cell2 = a2.get_cell() 

231 for i in range(self.number_of_variable_cell_vectors, 3): 

232 err = 'Unit cells are supposed to be identical in direction %d' 

233 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2) 

234 

235 invalid = True 

236 counter = 0 

237 maxcount = 1000 

238 a1_copy = a1.copy() 

239 a2_copy = a2.copy() 

240 

241 # Run until a valid pairing is made or maxcount pairings are tested. 

242 while invalid and counter < maxcount: 

243 counter += 1 

244 

245 newcell = self.generate_unit_cell(cell1, cell2) 

246 if newcell is None: 

247 # No valid unit cell could be generated. 

248 # This strongly suggests that it is near-impossible 

249 # to generate one from these parent cells and it is 

250 # better to abort now. 

251 break 

252 

253 # Choose direction of cutting plane normal 

254 if self.number_of_variable_cell_vectors == 0: 

255 # Will be generated entirely at random 

256 theta = np.pi * self.rng.random() 

257 phi = 2. * np.pi * self.rng.random() 

258 cut_n = np.array([np.cos(phi) * np.sin(theta), 

259 np.sin(phi) * np.sin(theta), np.cos(theta)]) 

260 else: 

261 # Pick one of the 'variable' cell vectors 

262 cut_n = self.rng.choice(self.number_of_variable_cell_vectors) 

263 

264 # Randomly translate parent structures 

265 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]): 

266 a_copy.set_positions(a.get_positions()) 

267 

268 cell = a_copy.get_cell() 

269 for i in range(self.number_of_variable_cell_vectors): 

270 r = self.rng.random() 

271 cond1 = i == cut_n and r < self.p1 

272 cond2 = i != cut_n and r < self.p2 

273 if cond1 or cond2: 

274 a_copy.positions += self.rng.random() * cell[i] 

275 

276 if self.use_tags: 

277 # For correct determination of the center- 

278 # of-position of the multi-atom blocks, 

279 # we need to group their constituent atoms 

280 # together 

281 gather_atoms_by_tag(a_copy) 

282 else: 

283 a_copy.wrap() 

284 

285 # Generate the cutting point in scaled coordinates 

286 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0) 

287 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0) 

288 cut_p = np.zeros((1, 3)) 

289 for i in range(3): 

290 if i < self.number_of_variable_cell_vectors: 

291 cut_p[0, i] = self.rng.random() 

292 else: 

293 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i]) 

294 

295 # Perform the pairing: 

296 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell) 

297 if child is None: 

298 continue 

299 

300 # Verify whether the atoms are too close or not: 

301 if atoms_too_close(child, self.blmin, use_tags=self.use_tags): 

302 continue 

303 

304 if self.test_dist_to_slab and len(self.slab) > 0: 

305 if atoms_too_close_two_sets(self.slab, child, self.blmin): 

306 continue 

307 

308 # Passed all the tests 

309 child = self.slab + child 

310 child.set_cell(newcell, scale_atoms=False) 

311 child.wrap() 

312 return child 

313 

314 return None 

315 

316 def generate_unit_cell(self, cell1, cell2, maxcount=10000): 

317 """Generates a new unit cell by a random linear combination 

318 of the parent cells. The new cell must satisfy the 

319 self.cellbounds constraints. Returns None if no such cell 

320 was generated within a given number of attempts. 

321 

322 Parameters: 

323 

324 maxcount: int 

325 The maximal number of attempts. 

326 """ 

327 # First calculate the scaling volume 

328 if not self.scaling_volume: 

329 v1 = np.abs(np.linalg.det(cell1)) 

330 v2 = np.abs(np.linalg.det(cell2)) 

331 r = self.rng.random() 

332 v_ref = r * v1 + (1 - r) * v2 

333 else: 

334 v_ref = self.scaling_volume 

335 

336 # Now the cell vectors 

337 if self.number_of_variable_cell_vectors == 0: 

338 assert np.allclose(cell1, cell2), 'Parent cells are not the same' 

339 newcell = np.copy(cell1) 

340 else: 

341 count = 0 

342 while count < maxcount: 

343 r = self.rng.random() 

344 newcell = r * cell1 + (1 - r) * cell2 

345 

346 vol = abs(np.linalg.det(newcell)) 

347 scaling = v_ref / vol 

348 scaling **= 1. / self.number_of_variable_cell_vectors 

349 newcell[:self.number_of_variable_cell_vectors] *= scaling 

350 

351 found = True 

352 if self.cellbounds is not None: 

353 found = self.cellbounds.is_within_bounds(newcell) 

354 if found: 

355 break 

356 

357 count += 1 

358 else: 

359 # Did not find acceptable cell 

360 newcell = None 

361 

362 return newcell 

363 

364 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell): 

365 """Creates a child from two parents using the given cut. 

366 

367 Returns None if the generated structure does not contain 

368 a large enough fraction of each parent (see self.minfrac). 

369 

370 Does not check whether atoms are too close. 

371 

372 Assumes the 'slab' parts have been removed from the parent 

373 structures and that these have been checked for equal 

374 lengths, stoichiometries, and tags (if self.use_tags). 

375 

376 Parameters: 

377 

378 cutting_normal: int or (1x3) array 

379 

380 cutting_point: (1x3) array 

381 In fractional coordinates 

382 

383 cell: (3x3) array 

384 The unit cell for the child structure 

385 """ 

386 symbols = a1.get_chemical_symbols() 

387 tags = a1.get_tags() if self.use_tags else np.arange(len(a1)) 

388 

389 # Generate list of all atoms / atom groups: 

390 p1, p2, sym = [], [], [] 

391 for i in np.unique(tags): 

392 indices = np.where(tags == i)[0] 

393 s = ''.join([symbols[j] for j in indices]) 

394 sym.append(s) 

395 

396 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])): 

397 c = a.get_cell() 

398 cop = np.mean(a.positions[indices], axis=0) 

399 cut_p = np.dot(cutting_point, c) 

400 if isinstance(cutting_normal, int): 

401 vecs = [c[j] for j in range(3) if j != cutting_normal] 

402 cut_n = np.cross(vecs[0], vecs[1]) 

403 else: 

404 cut_n = np.dot(cutting_normal, c) 

405 d = np.dot(cop - cut_p, cut_n) 

406 spos = a.get_scaled_positions()[indices] 

407 scop = np.mean(spos, axis=0) 

408 p.append(Positions(spos, scop, s, d, i)) 

409 

410 all_points = p1 + p2 

411 unique_sym = np.unique(sym) 

412 types = {s: sym.count(s) for s in unique_sym} 

413 

414 # Sort these by chemical symbols: 

415 all_points.sort(key=lambda x: x.symbols, reverse=True) 

416 

417 # For each atom type make the pairing 

418 unique_sym.sort() 

419 use_total = {} 

420 for s in unique_sym: 

421 used = [] 

422 not_used = [] 

423 # The list is looked trough in 

424 # reverse order so atoms can be removed 

425 # from the list along the way. 

426 for i in reversed(range(len(all_points))): 

427 # If there are no more atoms of this type 

428 if all_points[i].symbols != s: 

429 break 

430 # Check if the atom should be included 

431 if all_points[i].to_use(): 

432 used.append(all_points.pop(i)) 

433 else: 

434 not_used.append(all_points.pop(i)) 

435 

436 assert len(used) + len(not_used) == types[s] * 2 

437 

438 # While we have too few of the given atom type 

439 while len(used) < types[s]: 

440 index = self.rng.randint(len(not_used)) 

441 used.append(not_used.pop(index)) 

442 

443 # While we have too many of the given atom type 

444 while len(used) > types[s]: 

445 # remove randomly: 

446 index = self.rng.randint(len(used)) 

447 not_used.append(used.pop(index)) 

448 

449 use_total[s] = used 

450 

451 n_tot = sum(len(ll) for ll in use_total.values()) 

452 assert n_tot == len(sym) 

453 

454 # check if the generated structure contains 

455 # atoms from both parents: 

456 count1, count2, N = 0, 0, len(a1) 

457 for x in use_total.values(): 

458 count1 += sum(y.origin == 0 for y in x) 

459 count2 += sum(y.origin == 1 for y in x) 

460 

461 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N)) 

462 if count1 < nmin or count2 < nmin: 

463 return None 

464 

465 # Construct the cartesian positions and reorder the atoms 

466 # to follow the original order 

467 newpos = [] 

468 pbc = a1.get_pbc() 

469 for s in sym: 

470 p = use_total[s].pop() 

471 c = a1.get_cell() if p.origin == 0 else a2.get_cell() 

472 pos = np.dot(p.scaled_positions, c) 

473 cop = np.dot(p.cop, c) 

474 vectors, _lengths = find_mic(pos - cop, c, pbc) 

475 newcop = np.dot(p.cop, cell) 

476 pos = newcop + vectors 

477 for row in pos: 

478 newpos.append(row) 

479 

480 newpos = np.reshape(newpos, (N, 3)) 

481 num = a1.get_atomic_numbers() 

482 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell, 

483 tags=tags) 

484 child.wrap() 

485 return child