Coverage for /builds/ase/ase/ase/ga/startgenerator.py: 95.17%

1# fmt: off

3"""Tools for generating new random starting candidates."""

4import numpy as np

6from ase import Atoms

7from ase.build import molecule

8from ase.data import atomic_numbers

9from ase.ga.utilities import (

10 atoms_too_close,

11 atoms_too_close_two_sets,

12 closest_distances_generator,

13)

16class StartGenerator:

17 """Class for generating random starting candidates.

19 Its basic task consists of randomly placing atoms or

20 molecules within a predescribed box, while respecting

21 certain minimal interatomic distances.

23 Depending on the problem at hand, certain box vectors

24 may not be known or chosen beforehand, and hence also

25 need to be generated at random. Common cases include

26 bulk crystals, films and chains, with respectively

27 3, 2 and 1 unknown cell vectors.

29 Parameters:

31 slab: Atoms object

32 Specifies the cell vectors and periodic boundary conditions

33 to be applied to the randomly generated structures.

34 Any included atoms (e.g. representing an underlying slab)

35 are copied to these new structures.

36 Variable cell vectors (see number_of_variable_cell_vectors)

37 will be ignored because these will be generated at random.

39 blocks: list

40 List of building units for the structure. Each item can be:

42 * an integer: representing a single atom by its atomic number,

43 * a string: for a single atom (a chemical symbol) or a

44 molecule (name recognized by ase.build.molecule),

45 * an Atoms object,

46 * an (A, B) tuple or list where A is any of the above

47 and B is the number of A units to include.

49 A few examples:

51 >>> blocks = ['Ti'] * 4 + ['O'] * 8

52 >>> blocks = [('Ti', 4), ('O', 8)]

53 >>> blocks = [('CO2', 3)] # 3 CO2 molecules

54 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])

55 >>> blocks = [(co, 3)]

57 Each individual block (single atom or molecule) in the

58 randomly generated candidates is given a unique integer

59 tag. These can be used to preserve the molecular identity

60 of these subunits.

62 blmin: dict or float

63 Dictionary with minimal interatomic distances.

64 If a number is provided instead, the dictionary will

65 be generated with this ratio of covalent bond radii.

66 Note: when preserving molecular identity (see use_tags),

67 the blmin dict will (naturally) only be applied

68 to intermolecular distances (not the intramolecular

69 ones).

71 number_of_variable_cell_vectors: int (default 0)

72 The number of variable cell vectors (0, 1, 2 or 3).

73 To keep things simple, it is the 'first' vectors which

74 will be treated as variable, i.e. the 'a' vector in the

75 univariate case, the 'a' and 'b' vectors in the bivariate

76 case, etc.

78 box_to_place_in: [list, list of lists] (default None)

79 The box in which the atoms can be placed.

80 The default (None) means the box is equal to the

81 entire unit cell of the 'slab' object.

82 In many cases, however, smaller boxes are desired

83 (e.g. for adsorbates on a slab surface or for isolated

84 clusters). Then, box_to_place_in can be set as

85 [p0, [v1, v2, v3]] with positions being generated as

86 p0 + r1 * v1 + r2 * v2 + r3 + v3.

87 In case of one or more variable cell vectors,

88 the corresponding items in p0/v1/v2/v3 will be ignored.

90 box_volume: int or float or None (default)

91 Initial guess for the box volume in cubic Angstrom

92 (used in generating the variable cell vectors).

93 Typical values in the solid state are 8-12 A^3 per atom.

94 If there are no variable cell vectors, the default None

95 is required (box volume equal to the box_to_place_in

96 volume).

98 splits: dict or None

99 Splitting scheme for increasing the translational symmetry

100 in the random candidates, based on:

101

102 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__

103

104 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007

105

106 This should be a dict specifying the relative probabilities

107 for each split, written as tuples. For example,

108

109 >>> splits = {(2,): 3, (1,): 1}

110

111 This means that, for each structure, either a splitting

112 factor of 2 is applied to one randomly chosen axis,

113 or a splitting factor of 1 is applied (i.e., no splitting).

114 The probability ratio of the two scenararios will be 3:1,

115 i.e. 75% chance for the former and 25% chance for the latter

116 splitting scheme. Only the directions in which the 'slab'

117 object is periodic are eligible for splitting.

118

119 To e.g. always apply splitting factors of 2 and 3 along two

120 randomly chosen axes:

121

122 >>> splits = {(2, 3): 1}

123

124 By default, no splitting is applied (splits = None = {(1,): 1}).

125

126 cellbounds: ase.ga.utilities.CellBounds instance

127 Describing limits on the cell shape, see

128 :class:`~ase.ga.utilities.CellBounds`.

129 Note that it only make sense to impose conditions

130 regarding cell vectors which have been marked as

131 variable (see number_of_variable_cell_vectors).

132

133 test_dist_to_slab: bool (default True)

134 Whether to make sure that the distances between

135 the atoms and the slab satisfy the blmin.

136

137 test_too_far: bool (default True)

138 Whether to also make sure that there are no isolated

139 atoms or molecules with nearest-neighbour bond lengths

140 larger than 2x the value in the blmin dict.

141

142 rng: Random number generator

143 By default numpy.random.

144 """

145

146 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,

147 box_to_place_in=None, box_volume=None, splits=None,

148 cellbounds=None, test_dist_to_slab=True, test_too_far=True,

149 rng=np.random):

150

151 self.slab = slab

152

153 self.blocks = []

154 for item in blocks:

155 if isinstance(item, (tuple, list)):

156 assert len(item) == 2, 'Item length %d != 2' % len(item)

157 block, count = item

158 else:

159 block, count = item, 1

160

161 # Convert block into Atoms object

162 if isinstance(block, Atoms):

163 pass

164 elif block in atomic_numbers:

165 block = Atoms(block)

166 elif isinstance(block, str):

167 block = molecule(block)

168 elif block in atomic_numbers.values():

169 block = Atoms(numbers=[block])

170 else:

171 raise ValueError('Cannot parse this block:', block)

172

173 # Add to self.blocks, taking into account that

174 # we want to group the same blocks together.

175 # This is important for the cell splitting.

176 for i, (b, c) in enumerate(self.blocks):

177 if block == b:

178 self.blocks[i][1] += count

179 break

180 else:

181 self.blocks.append([block, count])

182

183 if isinstance(blmin, dict):

184 self.blmin = blmin

185 else:

186 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])

187 self.blmin = closest_distances_generator(

188 numbers,

189 ratio_of_covalent_radii=blmin)

190

191 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors

192 assert self.number_of_variable_cell_vectors in range(4)

193 if len(self.slab) > 0:

194 msg = 'Including atoms in the slab only makes sense'

195 msg += ' if there are no variable unit cell vectors'

196 assert self.number_of_variable_cell_vectors == 0, msg

197 for i in range(self.number_of_variable_cell_vectors):

198 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '

199 msg += 'and slab must then also be periodic in this direction'

200 assert self.slab.pbc[i], msg

201

202 if box_to_place_in is None:

203 p0 = np.array([0., 0., 0.])

204 cell = self.slab.get_cell()

205 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]

206 else:

207 self.box_to_place_in = box_to_place_in

208

209 if box_volume is None:

210 assert self.number_of_variable_cell_vectors == 0

211 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))

212 else:

213 assert self.number_of_variable_cell_vectors > 0

214 self.box_volume = box_volume

215 assert self.box_volume > 0

216

217 if splits is None:

218 splits = {(1,): 1}

219 tot = sum(v for v in splits.values())

220 self.splits = {k: v * 1. / tot for k, v in splits.items()}

221

222 self.cellbounds = cellbounds

223 self.test_too_far = test_too_far

224 self.test_dist_to_slab = test_dist_to_slab

225 self.rng = rng

226

227 def get_new_candidate(self, maxiter=None):

228 """Returns a new candidate.

229

230 maxiter: upper bound on the total number of times

231 the random position generator is called

232 when generating the new candidate.

233

234 By default (maxiter=None) no such bound

235 is imposed. If the generator takes too

236 long time to create a new candidate, it

237 may be suitable to specify a finite value.

238 When the bound is exceeded, None is returned.

239 """

240 pbc = self.slab.get_pbc()

241

242 # Choose cell splitting

243 r = self.rng.random()

244 cumprob = 0

245 for split, prob in self.splits.items():

246 cumprob += prob

247 if cumprob > r:

248 break

249

250 # Choose direction(s) along which to split

251 # and by how much

252 directions = [i for i in range(3) if pbc[i]]

253 repeat = [1, 1, 1]

254 if len(directions) > 0:

255 for number in split:

256 d = self.rng.choice(directions)

257 repeat[d] = number

258 repeat = tuple(repeat)

259

260 # Generate the 'full' unit cell

261 # for the eventual candidates

262 cell = self.generate_unit_cell(repeat)

263 if self.number_of_variable_cell_vectors == 0:

264 assert np.allclose(cell, self.slab.get_cell())

265

266 # Make the smaller 'box' in which we are

267 # allowed to place the atoms and which will

268 # then be repeated to fill the 'full' unit cell

269 box = np.copy(cell)

270 for i in range(self.number_of_variable_cell_vectors, 3):

271 box[i] = np.array(self.box_to_place_in[1][i])

272 box /= np.array([repeat]).T

273

274 # Here we gather the (reduced) number of blocks

275 # to put in the smaller box, and the 'surplus'

276 # occurring when the block count is not divisible

277 # by the number of repetitions.

278 # E.g. if we have a ('Ti', 4) block and do a

279 # [2, 3, 1] repetition, we employ a ('Ti', 1)

280 # block in the smaller box and delete 2 out 6

281 # Ti atoms afterwards

282 nrep = int(np.prod(repeat))

283 blocks, ids, surplus = [], [], []

284 for i, (block, count) in enumerate(self.blocks):

285 count_part = int(np.ceil(count * 1. / nrep))

286 blocks.extend([block] * count_part)

287 surplus.append(nrep * count_part - count)

288 ids.extend([i] * count_part)

289

290 N_blocks = len(blocks)

291

292 # Shuffle the ordering so different blocks

293 # are added in random order

294 order = np.arange(N_blocks)

295 self.rng.shuffle(order)

296 blocks = [blocks[i] for i in order]

297 ids = np.array(ids)[order]

298

299 # Add blocks one by one until we have found

300 # a valid candidate

301 blmin = self.blmin

302 blmin_too_far = {key: 2 * val for key, val in blmin.items()}

303

304 niter = 0

305 while maxiter is None or niter < maxiter:

306 cand = Atoms('', cell=box, pbc=pbc)

307

308 for i in range(N_blocks):

309 atoms = blocks[i].copy()

310 atoms.set_tags(i)

311 atoms.set_pbc(pbc)

312 atoms.set_cell(box, scale_atoms=False)

313

314 while maxiter is None or niter < maxiter:

315 niter += 1

316 cop = atoms.get_positions().mean(axis=0)

317 pos = np.dot(self.rng.random((1, 3)), box)

318 atoms.translate(pos - cop)

319

320 if len(atoms) > 1:

321 # Apply a random rotation to multi-atom blocks

322 phi, theta, psi = 360 * self.rng.random(3)

323 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,

324 center=pos)

325

326 if not atoms_too_close_two_sets(cand, atoms, blmin):

327 cand += atoms

328 break

329 else:

330 # Reached maximum iteration number

331 # Break out of the for loop above

332 cand = None

333 break

334

335 if cand is None:

336 # Exit the main while loop

337 break

338

339 # Rebuild the candidate after repeating,

340 # randomly deleting surplus blocks and

341 # sorting back to the original order

342 cand_full = cand.repeat(repeat)

343

344 tags_full = cand_full.get_tags()

345 for i in range(nrep):

346 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks

347 cand_full.set_tags(tags_full)

348

349 cand = Atoms('', cell=cell, pbc=pbc)

350 ids_full = np.tile(ids, nrep)

351

352 tag_counter = 0

353 if len(self.slab) > 0:

354 tag_counter = int(max(self.slab.get_tags())) + 1

355

356 for i, (block, count) in enumerate(self.blocks):

357 tags = np.where(ids_full == i)[0]

358 bad = self.rng.choice(tags, size=surplus[i], replace=False)

359 for tag in tags:

360 if tag not in bad:

361 select = [a.index for a in cand_full if a.tag == tag]

362 atoms = cand_full[select] # is indeed a copy!

363 atoms.set_tags(tag_counter)

364 assert len(atoms) == len(block)

365 cand += atoms

366 tag_counter += 1

367

368 for i in range(self.number_of_variable_cell_vectors, 3):

369 cand.positions[:, i] += self.box_to_place_in[0][i]

370

371 # By construction, the minimal interatomic distances

372 # within the structure should already be respected

373 assert not atoms_too_close(cand, blmin, use_tags=True), \

374 'This is not supposed to happen; please report this bug'

375

376 if self.test_dist_to_slab and len(self.slab) > 0:

377 if atoms_too_close_two_sets(self.slab, cand, blmin):

378 continue

379

380 if self.test_too_far:

381 tags = cand.get_tags()

382

383 for tag in np.unique(tags):

384 too_far = True

385 indices_i = np.where(tags == tag)[0]

386 indices_j = np.where(tags != tag)[0]

387 too_far = not atoms_too_close_two_sets(cand[indices_i],

388 cand[indices_j],

389 blmin_too_far)

390

391 if too_far and len(self.slab) > 0:

392 # the block is too far from the rest

393 # but might still be sufficiently

394 # close to the slab

395 too_far = not atoms_too_close_two_sets(cand[indices_i],

396 self.slab,

397 blmin_too_far)

398 if too_far:

399 break

400 else:

401 too_far = False

402

403 if too_far:

404 continue

405

406 # Passed all the tests

407 cand = self.slab + cand

408 cand.set_cell(cell, scale_atoms=False)

409 break

410 else:

411 # Reached max iteration count in the while loop

412 return None

413

414 return cand

415

416 def generate_unit_cell(self, repeat):

417 """Generates a random unit cell.

418

419 For this, we use the vectors in self.slab.cell

420 in the fixed directions and randomly generate

421 the variable ones. For such a cell to be valid,

422 it has to satisfy the self.cellbounds constraints.

423

424 The cell will also be such that the volume of the

425 box in which the atoms can be placed (box limits

426 described by self.box_to_place_in) is equal to

427 self.box_volume.

428

429 Parameters:

430

431 repeat: tuple of 3 integers

432 Indicates by how much each cell vector

433 will later be reduced by cell splitting.

434

435 This is used to ensure that the original

436 cell is large enough so that the cell lengths

437 of the smaller cell exceed the largest

438 (X,X)-minimal-interatomic-distance in self.blmin.

439 """

440 # Find the minimal cell length 'Lmin'

441 # that we need in order to ensure that

442 # an added atom or molecule will never

443 # be 'too close to itself'

444 Lmin = 0.

445 for atoms, count in self.blocks:

446 dist = atoms.get_all_distances(mic=False, vector=False)

447 num = atoms.get_atomic_numbers()

448

449 for i in range(len(atoms)):

450 dist[i, i] += self.blmin[(num[i], num[i])]

451 for j in range(i):

452 bl = self.blmin[(num[i], num[j])]

453 dist[i, j] += bl

454 dist[j, i] += bl

455

456 L = np.max(dist)

457 if L > Lmin:

458 Lmin = L

459

460 # Generate a suitable unit cell

461 valid = False

462 while not valid:

463 cell = np.zeros((3, 3))

464

465 for i in range(self.number_of_variable_cell_vectors):

466 # on-diagonal values

467 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)

468 cell[i, i] *= repeat[i]

469 for j in range(i):

470 # off-diagonal values

471 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]

472

473 # volume scaling

474 for i in range(self.number_of_variable_cell_vectors, 3):

475 cell[i] = self.box_to_place_in[1][i]

476

477 if self.number_of_variable_cell_vectors > 0:

478 volume = abs(np.linalg.det(cell))

479 scaling = self.box_volume / volume

480 scaling **= 1. / self.number_of_variable_cell_vectors

481 cell[:self.number_of_variable_cell_vectors] *= scaling

482

483 for i in range(self.number_of_variable_cell_vectors, 3):

484 cell[i] = self.slab.get_cell()[i]

485

486 # bounds checking

487 valid = True

488

489 if self.cellbounds is not None:

490 if not self.cellbounds.is_within_bounds(cell):

491 valid = False

492

493 if valid:

494 for i in range(3):

495 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:

496 assert self.number_of_variable_cell_vectors > 0

497 valid = False

498

499 return cell