Coverage for /builds/ase/ase/ase/ga/cutandsplicepairing.py: 90.19%

1# fmt: off

3"""Implementation of the cut-and-splice paring operator."""

4import numpy as np

6from ase import Atoms

7from ase.ga.offspring_creator import OffspringCreator

8from ase.ga.utilities import (

9 atoms_too_close,

10 atoms_too_close_two_sets,

11 gather_atoms_by_tag,

12)

13from ase.geometry import find_mic

16class Positions:

17 """Helper object to simplify the pairing process.

19 Parameters:

21 scaled_positions: (Nx3) array

22 Positions in scaled coordinates

23 cop: (1x3) array

24 Center-of-positions (also in scaled coordinates)

25 symbols: str

26 String with the atomic symbols

27 distance: float

28 Signed distance to the cutting plane

29 origin: int (0 or 1)

30 Determines at which side of the plane the position should be.

31 """

33 def __init__(self, scaled_positions, cop, symbols, distance, origin):

34 self.scaled_positions = scaled_positions

35 self.cop = cop

36 self.symbols = symbols

37 self.distance = distance

38 self.origin = origin

40 def to_use(self):

41 """Tells whether this position is at the right side."""

42 if self.distance > 0. and self.origin == 0:

43 return True

44 elif self.distance < 0. and self.origin == 1:

45 return True

46 else:

47 return False

50class CutAndSplicePairing(OffspringCreator):

51 """The Cut and Splice operator by Deaven and Ho.

53 Creates offspring from two parent structures using

54 a randomly generated cutting plane.

56 The parents may have different unit cells, in which

57 case the offspring unit cell will be a random combination

58 of the parent cells.

60 The basic implementation (for fixed unit cells) is

61 described in:

63 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012)

64 <10.1103/PhysRevLett.108.126101`>

66 The extension to variable unit cells is similar to:

68 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720

69 <10.1016/j.cpc.2006.07.020>`

71 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387

72 <10.1016/j.cpc.2010.07.048>`

74 The operator can furthermore preserve molecular identity

75 if desired (see the *use_tags* kwarg). Atoms with the same

76 tag will then be considered as belonging to the same molecule,

77 and their internal geometry will not be changed by the operator.

79 If use_tags is enabled, the operator will also conserve the

80 number of molecules of each kind (in addition to conserving

81 the overall stoichiometry). Currently, molecules are considered

82 to be of the same kind if their chemical symbol strings are

83 identical. In rare cases where this may not be sufficient

84 (i.e. when desiring to keep the same ratio of isomers), the

85 different isomers can be differentiated by giving them different

86 elemental orderings (e.g. 'XY2' and 'Y2X').

88 Parameters:

90 slab: Atoms object

91 Specifies the cell vectors and periodic boundary conditions

92 to be applied to the randomly generated structures.

93 Any included atoms (e.g. representing an underlying slab)

94 are copied to these new structures.

96 n_top: int

97 The number of atoms to optimize

99 blmin: dict

100 Dictionary with minimal interatomic distances.

101 Note: when preserving molecular identity (see use_tags),

102 the blmin dict will (naturally) only be applied

103 to intermolecular distances (not the intramolecular

104 ones).

105

106 number_of_variable_cell_vectors: int (default 0)

107 The number of variable cell vectors (0, 1, 2 or 3).

108 To keep things simple, it is the 'first' vectors which

109 will be treated as variable, i.e. the 'a' vector in the

110 univariate case, the 'a' and 'b' vectors in the bivariate

111 case, etc.

112

113 p1: float or int between 0 and 1

114 Probability that a parent is shifted over a random

115 distance along the normal of the cutting plane

116 (only operative if number_of_variable_cell_vectors > 0).

117

118 p2: float or int between 0 and 1

119 Same as p1, but for shifting along the directions

120 in the cutting plane (only operative if

121 number_of_variable_cell_vectors > 0).

122

123 minfrac: float between 0 and 1, or None (default)

124 Minimal fraction of atoms a parent must contribute

125 to the child. If None, each parent must contribute

126 at least one atom.

127

128 cellbounds: ase.ga.utilities.CellBounds instance

129 Describing limits on the cell shape, see

130 :class:`~ase.ga.utilities.CellBounds`.

131 Note that it only make sense to impose conditions

132 regarding cell vectors which have been marked as

133 variable (see number_of_variable_cell_vectors).

134

135 use_tags: bool

136 Whether to use the atomic tags to preserve

137 molecular identity.

138

139 test_dist_to_slab: bool (default True)

140 Whether to make sure that the distances between

141 the atoms and the slab satisfy the blmin.

142

143 rng: Random number generator

144 By default numpy.random.

145 """

146

147 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0,

148 p1=1, p2=0.05, minfrac=None, cellbounds=None,

149 test_dist_to_slab=True, use_tags=False, rng=np.random,

150 verbose=False):

151

152 OffspringCreator.__init__(self, verbose, rng=rng)

153 self.slab = slab

154 self.n_top = n_top

155 self.blmin = blmin

156 assert number_of_variable_cell_vectors in range(4)

157 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors

158 self.p1 = p1

159 self.p2 = p2

160 self.minfrac = minfrac

161 self.cellbounds = cellbounds

162 self.test_dist_to_slab = test_dist_to_slab

163 self.use_tags = use_tags

164

165 self.scaling_volume = None

166 self.descriptor = 'CutAndSplicePairing'

167 self.min_inputs = 2

168

169 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0):

170 """Updates the scaling volume that is used in the pairing

171

172 w_adapt: weight of the new vs the old scaling volume

173 n_adapt: number of best candidates in the population that

174 are used to calculate the new scaling volume

175 """

176 if not n_adapt:

177 # take best 20% of the population

178 n_adapt = int(np.ceil(0.2 * len(population)))

179 v_new = np.mean([a.get_volume() for a in population[:n_adapt]])

180

181 if not self.scaling_volume:

182 self.scaling_volume = v_new

183 else:

184 volumes = [self.scaling_volume, v_new]

185 weights = [1 - w_adapt, w_adapt]

186 self.scaling_volume = np.average(volumes, weights=weights)

187

188 def get_new_individual(self, parents):

189 """The method called by the user that

190 returns the paired structure."""

191 f, m = parents

192

193 indi = self.cross(f, m)

194 desc = 'pairing: {} {}'.format(f.info['confid'],

195 m.info['confid'])

196 # It is ok for an operator to return None

197 # It means that it could not make a legal offspring

198 # within a reasonable amount of time

199 if indi is None:

200 return indi, desc

201 indi = self.initialize_individual(f, indi)

202 indi.info['data']['parents'] = [f.info['confid'],

203 m.info['confid']]

204

205 return self.finalize_individual(indi), desc

206

207 def cross(self, a1, a2):

208 """Crosses the two atoms objects and returns one"""

209

210 if len(a1) != len(self.slab) + self.n_top:

211 raise ValueError('Wrong size of structure to optimize')

212 if len(a1) != len(a2):

213 raise ValueError('The two structures do not have the same length')

214

215 N = self.n_top

216

217 # Only consider the atoms to optimize

218 a1 = a1[len(a1) - N: len(a1)]

219 a2 = a2[len(a2) - N: len(a2)]

220

221 if not np.array_equal(a1.numbers, a2.numbers):

222 err = 'Trying to pair two structures with different stoichiometry'

223 raise ValueError(err)

224

225 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()):

226 err = 'Trying to pair two structures with different tags'

227 raise ValueError(err)

228

229 cell1 = a1.get_cell()

230 cell2 = a2.get_cell()

231 for i in range(self.number_of_variable_cell_vectors, 3):

232 err = 'Unit cells are supposed to be identical in direction %d'

233 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2)

234

235 invalid = True

236 counter = 0

237 maxcount = 1000

238 a1_copy = a1.copy()

239 a2_copy = a2.copy()

240

241 # Run until a valid pairing is made or maxcount pairings are tested.

242 while invalid and counter < maxcount:

243 counter += 1

244

245 newcell = self.generate_unit_cell(cell1, cell2)

246 if newcell is None:

247 # No valid unit cell could be generated.

248 # This strongly suggests that it is near-impossible

249 # to generate one from these parent cells and it is

250 # better to abort now.

251 break

252

253 # Choose direction of cutting plane normal

254 if self.number_of_variable_cell_vectors == 0:

255 # Will be generated entirely at random

256 theta = np.pi * self.rng.random()

257 phi = 2. * np.pi * self.rng.random()

258 cut_n = np.array([np.cos(phi) * np.sin(theta),

259 np.sin(phi) * np.sin(theta), np.cos(theta)])

260 else:

261 # Pick one of the 'variable' cell vectors

262 cut_n = self.rng.choice(self.number_of_variable_cell_vectors)

263

264 # Randomly translate parent structures

265 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]):

266 a_copy.set_positions(a.get_positions())

267

268 cell = a_copy.get_cell()

269 for i in range(self.number_of_variable_cell_vectors):

270 r = self.rng.random()

271 cond1 = i == cut_n and r < self.p1

272 cond2 = i != cut_n and r < self.p2

273 if cond1 or cond2:

274 a_copy.positions += self.rng.random() * cell[i]

275

276 if self.use_tags:

277 # For correct determination of the center-

278 # of-position of the multi-atom blocks,

279 # we need to group their constituent atoms

280 # together

281 gather_atoms_by_tag(a_copy)

282 else:

283 a_copy.wrap()

284

285 # Generate the cutting point in scaled coordinates

286 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0)

287 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0)

288 cut_p = np.zeros((1, 3))

289 for i in range(3):

290 if i < self.number_of_variable_cell_vectors:

291 cut_p[0, i] = self.rng.random()

292 else:

293 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i])

294

295 # Perform the pairing:

296 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell)

297 if child is None:

298 continue

299

300 # Verify whether the atoms are too close or not:

301 if atoms_too_close(child, self.blmin, use_tags=self.use_tags):

302 continue

303

304 if self.test_dist_to_slab and len(self.slab) > 0:

305 if atoms_too_close_two_sets(self.slab, child, self.blmin):

306 continue

307

308 # Passed all the tests

309 child = self.slab + child

310 child.set_cell(newcell, scale_atoms=False)

311 child.wrap()

312 return child

313

314 return None

315

316 def generate_unit_cell(self, cell1, cell2, maxcount=10000):

317 """Generates a new unit cell by a random linear combination

318 of the parent cells. The new cell must satisfy the

319 self.cellbounds constraints. Returns None if no such cell

320 was generated within a given number of attempts.

321

322 Parameters:

323

324 maxcount: int

325 The maximal number of attempts.

326 """

327 # First calculate the scaling volume

328 if not self.scaling_volume:

329 v1 = np.abs(np.linalg.det(cell1))

330 v2 = np.abs(np.linalg.det(cell2))

331 r = self.rng.random()

332 v_ref = r * v1 + (1 - r) * v2

333 else:

334 v_ref = self.scaling_volume

335

336 # Now the cell vectors

337 if self.number_of_variable_cell_vectors == 0:

338 assert np.allclose(cell1, cell2), 'Parent cells are not the same'

339 newcell = np.copy(cell1)

340 else:

341 count = 0

342 while count < maxcount:

343 r = self.rng.random()

344 newcell = r * cell1 + (1 - r) * cell2

345

346 vol = abs(np.linalg.det(newcell))

347 scaling = v_ref / vol

348 scaling **= 1. / self.number_of_variable_cell_vectors

349 newcell[:self.number_of_variable_cell_vectors] *= scaling

350

351 found = True

352 if self.cellbounds is not None:

353 found = self.cellbounds.is_within_bounds(newcell)

354 if found:

355 break

356

357 count += 1

358 else:

359 # Did not find acceptable cell

360 newcell = None

361

362 return newcell

363

364 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell):

365 """Creates a child from two parents using the given cut.

366

367 Returns None if the generated structure does not contain

368 a large enough fraction of each parent (see self.minfrac).

369

370 Does not check whether atoms are too close.

371

372 Assumes the 'slab' parts have been removed from the parent

373 structures and that these have been checked for equal

374 lengths, stoichiometries, and tags (if self.use_tags).

375

376 Parameters:

377

378 cutting_normal: int or (1x3) array

379

380 cutting_point: (1x3) array

381 In fractional coordinates

382

383 cell: (3x3) array

384 The unit cell for the child structure

385 """

386 symbols = a1.get_chemical_symbols()

387 tags = a1.get_tags() if self.use_tags else np.arange(len(a1))

388

389 # Generate list of all atoms / atom groups:

390 p1, p2, sym = [], [], []

391 for i in np.unique(tags):

392 indices = np.where(tags == i)[0]

393 s = ''.join([symbols[j] for j in indices])

394 sym.append(s)

395

396 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])):

397 c = a.get_cell()

398 cop = np.mean(a.positions[indices], axis=0)

399 cut_p = np.dot(cutting_point, c)

400 if isinstance(cutting_normal, int):

401 vecs = [c[j] for j in range(3) if j != cutting_normal]

402 cut_n = np.cross(vecs[0], vecs[1])

403 else:

404 cut_n = np.dot(cutting_normal, c)

405 d = np.dot(cop - cut_p, cut_n)

406 spos = a.get_scaled_positions()[indices]

407 scop = np.mean(spos, axis=0)

408 p.append(Positions(spos, scop, s, d, i))

409

410 all_points = p1 + p2

411 unique_sym = np.unique(sym)

412 types = {s: sym.count(s) for s in unique_sym}

413

414 # Sort these by chemical symbols:

415 all_points.sort(key=lambda x: x.symbols, reverse=True)

416

417 # For each atom type make the pairing

418 unique_sym.sort()

419 use_total = {}

420 for s in unique_sym:

421 used = []

422 not_used = []

423 # The list is looked trough in

424 # reverse order so atoms can be removed

425 # from the list along the way.

426 for i in reversed(range(len(all_points))):

427 # If there are no more atoms of this type

428 if all_points[i].symbols != s:

429 break

430 # Check if the atom should be included

431 if all_points[i].to_use():

432 used.append(all_points.pop(i))

433 else:

434 not_used.append(all_points.pop(i))

435

436 assert len(used) + len(not_used) == types[s] * 2

437

438 # While we have too few of the given atom type

439 while len(used) < types[s]:

440 index = self.rng.randint(len(not_used))

441 used.append(not_used.pop(index))

442

443 # While we have too many of the given atom type

444 while len(used) > types[s]:

445 # remove randomly:

446 index = self.rng.randint(len(used))

447 not_used.append(used.pop(index))

448

449 use_total[s] = used

450

451 n_tot = sum(len(ll) for ll in use_total.values())

452 assert n_tot == len(sym)

453

454 # check if the generated structure contains

455 # atoms from both parents:

456 count1, count2, N = 0, 0, len(a1)

457 for x in use_total.values():

458 count1 += sum(y.origin == 0 for y in x)

459 count2 += sum(y.origin == 1 for y in x)

460

461 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N))

462 if count1 < nmin or count2 < nmin:

463 return None

464

465 # Construct the cartesian positions and reorder the atoms

466 # to follow the original order

467 newpos = []

468 pbc = a1.get_pbc()

469 for s in sym:

470 p = use_total[s].pop()

471 c = a1.get_cell() if p.origin == 0 else a2.get_cell()

472 pos = np.dot(p.scaled_positions, c)

473 cop = np.dot(p.cop, c)

474 vectors, _lengths = find_mic(pos - cop, c, pbc)

475 newcop = np.dot(p.cop, cell)

476 pos = newcop + vectors

477 for row in pos:

478 newpos.append(row)

479

480 newpos = np.reshape(newpos, (N, 3))

481 num = a1.get_atomic_numbers()

482 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell,

483 tags=tags)

484 child.wrap()

485 return child