Coverage for /builds/ase/ase/ase/ga/ofp_comparator.py: 50.33%

306 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3from itertools import combinations_with_replacement 

4from math import erf 

5 

6import matplotlib.pyplot as plt 

7import numpy as np 

8from scipy.spatial.distance import cdist 

9 

10from ase.neighborlist import NeighborList 

11from ase.utils import pbc2pbc 

12 

13 

14class OFPComparator: 

15 """Implementation of comparison using Oganov's fingerprint (OFP) 

16 functions, based on: 

17 

18 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009) 

19 <10.1063/1.3079326>` 

20 

21 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

22 <10.1016/j.cpc.2010.06.007>` 

23 

24 Parameters: 

25 

26 n_top: int or None 

27 The number of atoms to optimize (None = include all). 

28 

29 dE: float 

30 Energy difference above which two structures are 

31 automatically considered to be different. (Default 1 eV) 

32 

33 cos_dist_max: float 

34 Maximal cosine distance between two structures in 

35 order to be still considered the same structure. Default 5e-3 

36 

37 rcut: float 

38 Cutoff radius in Angstrom for the fingerprints. 

39 (Default 20 Angstrom) 

40 

41 binwidth: float 

42 Width in Angstrom of the bins over which the fingerprints 

43 are discretized. (Default 0.05 Angstrom) 

44 

45 pbc: list of three booleans or None 

46 Specifies whether to apply periodic boundary conditions 

47 along each of the three unit cell vectors when calculating 

48 the fingerprint. The default (None) is to apply PBCs in all 

49 3 directions. 

50 

51 Note: for isolated systems (pbc = [False, False, False]), 

52 the pair correlation function itself is always short-ranged 

53 (decays to zero beyond a certain radius), so unity is not 

54 subtracted for calculating the fingerprint. Also the 

55 volume normalization disappears. 

56 

57 maxdims: list of three floats or None 

58 If PBCs in only 1 or 2 dimensions are specified, the 

59 maximal thicknesses along the non-periodic directions can 

60 be specified here (the values given for the periodic 

61 directions will not be used). If set to None (the 

62 default), the length of the cell vector along the 

63 non-periodic direction is used. 

64 

65 Note: in this implementation, the cell vectors are 

66 assumed to be orthogonal. 

67 

68 sigma: float 

69 Standard deviation of the gaussian smearing to be applied 

70 in the calculation of the fingerprints (in 

71 Angstrom). Default 0.02 Angstrom. 

72 

73 nsigma: int 

74 Distance (as the number of standard deviations sigma) at 

75 which the gaussian smearing is cut off (i.e. no smearing 

76 beyond that distance). (Default 4) 

77 

78 recalculate: boolean 

79 If True, ignores the fingerprints stored in 

80 atoms.info and recalculates them. (Default False) 

81 

82 """ 

83 

84 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20., 

85 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True, 

86 maxdims=None, recalculate=False): 

87 self.n_top = n_top or 0 

88 self.dE = dE 

89 self.cos_dist_max = cos_dist_max 

90 self.rcut = rcut 

91 self.binwidth = binwidth 

92 self.pbc = pbc2pbc(pbc) 

93 

94 if maxdims is None: 

95 self.maxdims = [None] * 3 

96 else: 

97 self.maxdims = maxdims 

98 

99 self.sigma = sigma 

100 self.nsigma = nsigma 

101 self.recalculate = recalculate 

102 self.dimensions = self.pbc.sum() 

103 

104 if self.dimensions == 1 or self.dimensions == 2: 

105 for direction in range(3): 

106 if not self.pbc[direction]: 

107 if self.maxdims[direction] is not None: 

108 if self.maxdims[direction] <= 0: 

109 e = '''If a max thickness is specificed in maxdims 

110 for a non-periodic direction, it has to be 

111 strictly positive.''' 

112 raise ValueError(e) 

113 

114 def looks_like(self, a1, a2): 

115 """ Return if structure a1 or a2 are similar or not. """ 

116 if len(a1) != len(a2): 

117 raise Exception('The two configurations are not the same size.') 

118 

119 # first we check the energy criteria 

120 if a1.calc is not None and a2.calc is not None: 

121 dE = abs(a1.get_potential_energy() - a2.get_potential_energy()) 

122 if dE >= self.dE: 

123 return False 

124 

125 # then we check the structure 

126 cos_dist = self._compare_structure(a1, a2) 

127 verdict = cos_dist < self.cos_dist_max 

128 return verdict 

129 

130 def _json_encode(self, fingerprints, typedic): 

131 """ json does not accept tuples nor integers as dict keys, 

132 so in order to write the fingerprints to atoms.info, we need 

133 to convert them to strings """ 

134 fingerprints_encoded = {} 

135 for key, val in fingerprints.items(): 

136 try: 

137 newkey = "_".join(map(str, list(key))) 

138 except TypeError: 

139 newkey = str(key) 

140 if isinstance(val, dict): 

141 fingerprints_encoded[newkey] = { 

142 str(key2): val2 for key2, val2 in val.items()} 

143 else: 

144 fingerprints_encoded[newkey] = val 

145 typedic_encoded = {} 

146 for key, val in typedic.items(): 

147 newkey = str(key) 

148 typedic_encoded[newkey] = val 

149 return [fingerprints_encoded, typedic_encoded] 

150 

151 def _json_decode(self, fingerprints, typedic): 

152 """ This is the reverse operation of _json_encode """ 

153 fingerprints_decoded = {} 

154 for key, val in fingerprints.items(): 

155 newkey = list(map(int, key.split("_"))) 

156 if len(newkey) > 1: 

157 newkey = tuple(newkey) 

158 else: 

159 newkey = newkey[0] 

160 

161 if isinstance(val, dict): 

162 fingerprints_decoded[newkey] = { 

163 int(key2): np.array(val2) for key2, val2 in val.items() 

164 } 

165 else: 

166 fingerprints_decoded[newkey] = np.array(val) 

167 typedic_decoded = {} 

168 for key, val in typedic.items(): 

169 newkey = int(key) 

170 typedic_decoded[newkey] = val 

171 return [fingerprints_decoded, typedic_decoded] 

172 

173 def _compare_structure(self, a1, a2): 

174 """ Returns the cosine distance between the two structures, 

175 using their fingerprints. """ 

176 

177 if len(a1) != len(a2): 

178 raise Exception('The two configurations are not the same size.') 

179 

180 a1top = a1[-self.n_top:] 

181 a2top = a2[-self.n_top:] 

182 

183 if 'fingerprints' in a1.info and not self.recalculate: 

184 fp1, typedic1 = a1.info['fingerprints'] 

185 fp1, typedic1 = self._json_decode(fp1, typedic1) 

186 else: 

187 fp1, typedic1 = self._take_fingerprints(a1top) 

188 a1.info['fingerprints'] = self._json_encode(fp1, typedic1) 

189 

190 if 'fingerprints' in a2.info and not self.recalculate: 

191 fp2, typedic2 = a2.info['fingerprints'] 

192 fp2, typedic2 = self._json_decode(fp2, typedic2) 

193 else: 

194 fp2, typedic2 = self._take_fingerprints(a2top) 

195 a2.info['fingerprints'] = self._json_encode(fp2, typedic2) 

196 

197 if sorted(fp1) != sorted(fp2): 

198 raise AssertionError('The two structures have fingerprints ' 

199 'with different compounds.') 

200 for key in typedic1: 

201 if not np.array_equal(typedic1[key], typedic2[key]): 

202 raise AssertionError('The two structures have a different ' 

203 'stoichiometry or ordering!') 

204 

205 cos_dist = self._cosine_distance(fp1, fp2, typedic1) 

206 return cos_dist 

207 

208 def _get_volume(self, a): 

209 ''' Calculates the normalizing value, and other parameters 

210 (pmin,pmax,qmin,qmax) that are used for surface area calculation 

211 in the case of 1 or 2-D periodicity.''' 

212 

213 cell = a.get_cell() 

214 scalpos = a.get_scaled_positions() 

215 

216 # defaults: 

217 volume = 1. 

218 pmin, pmax, qmin, qmax = [0.] * 4 

219 

220 if self.dimensions == 1 or self.dimensions == 2: 

221 for direction in range(3): 

222 if not self.pbc[direction]: 

223 if self.maxdims[direction] is None: 

224 maxdim = np.linalg.norm(cell[direction, :]) 

225 self.maxdims[direction] = maxdim 

226 

227 pbc_dirs = [i for i in range(3) if self.pbc[i]] 

228 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

229 

230 if self.dimensions == 3: 

231 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :])) 

232 

233 elif self.dimensions == 2: 

234 non_pbc_dir = non_pbc_dirs[0] 

235 

236 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :]) 

237 b = self.maxdims[non_pbc_dir] 

238 b /= np.linalg.norm(cell[non_pbc_dir, :]) 

239 

240 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :])) 

241 

242 maxpos = np.max(scalpos[:, non_pbc_dir]) 

243 minpos = np.min(scalpos[:, non_pbc_dir]) 

244 pwidth = maxpos - minpos 

245 pmargin = 0.5 * (b - pwidth) 

246 # note: here is a place where we assume that the 

247 # non-periodic direction is orthogonal to the periodic ones: 

248 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin 

249 pmin *= np.linalg.norm(cell[non_pbc_dir, :]) 

250 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin 

251 pmax *= np.linalg.norm(cell[non_pbc_dir, :]) 

252 

253 elif self.dimensions == 1: 

254 pbc_dir = pbc_dirs[0] 

255 

256 v0 = cell[non_pbc_dirs[0], :] 

257 b0 = self.maxdims[non_pbc_dirs[0]] 

258 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

259 v1 = cell[non_pbc_dirs[1], :] 

260 b1 = self.maxdims[non_pbc_dirs[1]] 

261 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

262 

263 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1), 

264 cell[pbc_dir, :])) 

265 

266 # note: here is a place where we assume that the 

267 # non-periodic direction is orthogonal to the periodic ones: 

268 maxpos = np.max(scalpos[:, non_pbc_dirs[0]]) 

269 minpos = np.min(scalpos[:, non_pbc_dirs[0]]) 

270 pwidth = maxpos - minpos 

271 pmargin = 0.5 * (b0 - pwidth) 

272 

273 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin 

274 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

275 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin 

276 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

277 

278 maxpos = np.max(scalpos[:, non_pbc_dirs[1]]) 

279 minpos = np.min(scalpos[:, non_pbc_dirs[1]]) 

280 qwidth = maxpos - minpos 

281 qmargin = 0.5 * (b1 - qwidth) 

282 

283 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin 

284 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

285 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin 

286 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

287 

288 elif self.dimensions == 0: 

289 volume = 1. 

290 

291 return [volume, pmin, pmax, qmin, qmax] 

292 

293 def _take_fingerprints(self, atoms, individual=False): 

294 """ Returns a [fingerprints,typedic] list, where fingerprints 

295 is a dictionary with the fingerprints, and typedic is a 

296 dictionary with the list of atom indices for each element 

297 (or "type") in the atoms object. 

298 The keys in the fingerprints dictionary are the (A,B) tuples, 

299 which are the different element-element combinations in the 

300 atoms object (A and B are the atomic numbers). 

301 When A != B, the (A,B) tuple is sorted (A < B). 

302 

303 If individual=True, a dict is returned, where each atom index 

304 has an {atomic_number:fingerprint} dict as value. 

305 If individual=False, the fingerprints from atoms of the same 

306 atomic number are added together.""" 

307 

308 pos = atoms.get_positions() 

309 num = atoms.get_atomic_numbers() 

310 cell = atoms.get_cell() 

311 

312 unique_types = np.unique(num) 

313 posdic = {} 

314 typedic = {} 

315 for t in unique_types: 

316 tlist = [i for i, atom in enumerate(atoms) if atom.number == t] 

317 typedic[t] = tlist 

318 posdic[t] = pos[tlist] 

319 

320 # determining the volume normalization and other parameters 

321 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms) 

322 

323 # functions for calculating the surface area 

324 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

325 

326 def surface_area_0d(r): 

327 return 4 * np.pi * (r**2) 

328 

329 def surface_area_1d(r, pos): 

330 q0 = pos[non_pbc_dirs[1]] 

331 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real 

332 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real 

333 factor = 1 - (phi1 + phi2) / np.pi 

334 return surface_area_2d(r, pos) * factor 

335 

336 def surface_area_2d(r, pos): 

337 p0 = pos[non_pbc_dirs[0]] 

338 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r) 

339 area *= 2 * np.pi * r 

340 return area 

341 

342 def surface_area_3d(r): 

343 return 4 * np.pi * (r**2) 

344 

345 # build neighborlist 

346 # this is computationally the most intensive part 

347 a = atoms.copy() 

348 a.set_pbc(self.pbc) 

349 nl = NeighborList([self.rcut / 2.] * len(a), skin=0., 

350 self_interaction=False, bothways=True) 

351 nl.update(a) 

352 

353 # parameters for the binning: 

354 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth)) 

355 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma 

356 smearing_norm = erf(x) 

357 nbins = int(np.ceil(self.rcut * 1. / self.binwidth)) 

358 bindist = self.binwidth * np.arange(1, nbins + 1) 

359 

360 def take_individual_rdf(index, unique_type): 

361 # Computes the radial distribution function of atoms 

362 # of type unique_type around the atom with index "index". 

363 rdf = np.zeros(nbins) 

364 

365 if self.dimensions == 3: 

366 weights = 1. / surface_area_3d(bindist) 

367 elif self.dimensions == 2: 

368 weights = 1. / surface_area_2d(bindist, pos[index]) 

369 elif self.dimensions == 1: 

370 weights = 1. / surface_area_1d(bindist, pos[index]) 

371 elif self.dimensions == 0: 

372 weights = 1. / surface_area_0d(bindist) 

373 weights /= self.binwidth 

374 

375 indices, offsets = nl.get_neighbors(index) 

376 valid = np.where(num[indices] == unique_type) 

377 p = pos[indices[valid]] + np.dot(offsets[valid], cell) 

378 r = cdist(p, [pos[index]]) 

379 bins = np.floor(r / self.binwidth) 

380 

381 for i in range(-m, m + 1): 

382 newbins = bins + i 

383 valid = np.where((newbins >= 0) & (newbins < nbins)) 

384 valid_bins = newbins[valid].astype(int) 

385 values = weights[valid_bins] 

386 

387 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma 

388 values *= 0.5 * erf(c * (2 * i + 1)) - \ 

389 0.5 * erf(c * (2 * i - 1)) 

390 values /= smearing_norm 

391 

392 for j, valid_bin in enumerate(valid_bins): 

393 rdf[valid_bin] += values[j] 

394 

395 rdf /= len(typedic[unique_type]) * 1. / volume 

396 return rdf 

397 

398 fingerprints = {} 

399 if individual: 

400 for i in range(len(atoms)): 

401 fingerprints[i] = {} 

402 for unique_type in unique_types: 

403 fingerprint = take_individual_rdf(i, unique_type) 

404 if self.dimensions > 0: 

405 fingerprint -= 1 

406 fingerprints[i][unique_type] = fingerprint 

407 else: 

408 for t1, t2 in combinations_with_replacement(unique_types, r=2): 

409 key = (t1, t2) 

410 fingerprint = np.zeros(nbins) 

411 for i in typedic[t1]: 

412 fingerprint += take_individual_rdf(i, t2) 

413 fingerprint /= len(typedic[t1]) 

414 if self.dimensions > 0: 

415 fingerprint -= 1 

416 fingerprints[key] = fingerprint 

417 

418 return [fingerprints, typedic] 

419 

420 def _calculate_local_orders(self, individual_fingerprints, typedic, 

421 volume): 

422 """ Returns a list with the local order for every atom, 

423 using the definition of local order from 

424 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

425 :doi:`10.1016/j.cpc.2010.06.007`""" 

426 

427 # total number of atoms: 

428 n_tot = sum(len(typedic[key]) for key in typedic) 

429 inv_n_tot = 1. / n_tot 

430 

431 local_orders = [] 

432 for fingerprints in individual_fingerprints.values(): 

433 local_order = 0 

434 for unique_type, fingerprint in fingerprints.items(): 

435 term = np.linalg.norm(fingerprint)**2 

436 term *= self.binwidth 

437 term *= (volume * inv_n_tot)**(-1 / 3) 

438 term *= len(typedic[unique_type]) * inv_n_tot 

439 local_order += term 

440 local_orders.append(np.sqrt(local_order)) 

441 

442 return local_orders 

443 

444 def get_local_orders(self, a): 

445 """ Returns the local orders of all the atoms.""" 

446 

447 a_top = a[-self.n_top:] 

448 key = 'individual_fingerprints' 

449 

450 if key in a.info and not self.recalculate: 

451 fp, typedic = self._json_decode(*a.info[key]) 

452 else: 

453 fp, typedic = self._take_fingerprints(a_top, individual=True) 

454 a.info[key] = self._json_encode(fp, typedic) 

455 

456 volume, _pmin, _pmax, _qmin, _qmax = self._get_volume(a_top) 

457 return self._calculate_local_orders(fp, typedic, volume) 

458 

459 def _cosine_distance(self, fp1, fp2, typedic): 

460 """ Returns the cosine distance from two fingerprints. 

461 It also needs information about the number of atoms from 

462 each element, which is included in "typedic".""" 

463 

464 keys = sorted(fp1) 

465 

466 # calculating the weights: 

467 w = {} 

468 wtot = 0 

469 for key in keys: 

470 weight = len(typedic[key[0]]) * len(typedic[key[1]]) 

471 wtot += weight 

472 w[key] = weight 

473 for key in keys: 

474 w[key] *= 1. / wtot 

475 

476 # calculating the fingerprint norms: 

477 norm1 = 0 

478 norm2 = 0 

479 for key in keys: 

480 norm1 += (np.linalg.norm(fp1[key])**2) * w[key] 

481 norm2 += (np.linalg.norm(fp2[key])**2) * w[key] 

482 norm1 = np.sqrt(norm1) 

483 norm2 = np.sqrt(norm2) 

484 

485 # calculating the distance: 

486 distance = 0 

487 for key in keys: 

488 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2) 

489 

490 distance = 0.5 * (1 - distance) 

491 return distance 

492 

493 def plot_fingerprints(self, a, prefix=''): 

494 """ Function for quickly plotting all the fingerprints. 

495 Prefix = a prefix you want to give to the resulting PNG file.""" 

496 

497 if 'fingerprints' in a.info and not self.recalculate: 

498 fp, typedic = a.info['fingerprints'] 

499 fp, typedic = self._json_decode(fp, typedic) 

500 else: 

501 a_top = a[-self.n_top:] 

502 fp, typedic = self._take_fingerprints(a_top) 

503 a.info['fingerprints'] = self._json_encode(fp, typedic) 

504 

505 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

506 x = np.linspace(0, self.rcut, npts, endpoint=False) 

507 

508 for key, val in fp.items(): 

509 plt.plot(x, val) 

510 suffix = f"_fp_{key[0]}_{key[1]}.png" 

511 plt.savefig(prefix + suffix) 

512 plt.clf() 

513 

514 def plot_individual_fingerprints(self, a, prefix=''): 

515 """ Function for plotting all the individual fingerprints. 

516 Prefix = a prefix for the resulting PNG file.""" 

517 if 'individual_fingerprints' in a.info and not self.recalculate: 

518 fp, typedic = a.info['individual_fingerprints'] 

519 else: 

520 a_top = a[-self.n_top:] 

521 fp, typedic = self._take_fingerprints(a_top, individual=True) 

522 a.info['individual_fingerprints'] = [fp, typedic] 

523 

524 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

525 x = np.linspace(0, self.rcut, npts, endpoint=False) 

526 

527 for key, val in fp.items(): 

528 for key2, val2 in val.items(): 

529 plt.plot(x, val2) 

530 plt.ylim([-1, 10]) 

531 suffix = f"_individual_fp_{key}_{key2}.png" 

532 plt.savefig(prefix + suffix) 

533 plt.clf()