Coverage for /builds/ase/ase/ase/ga/ofp_comparator.py: 50.33%
306 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3from itertools import combinations_with_replacement
4from math import erf
6import matplotlib.pyplot as plt
7import numpy as np
8from scipy.spatial.distance import cdist
10from ase.neighborlist import NeighborList
11from ase.utils import pbc2pbc
14class OFPComparator:
15 """Implementation of comparison using Oganov's fingerprint (OFP)
16 functions, based on:
18 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009)
19 <10.1063/1.3079326>`
21 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
22 <10.1016/j.cpc.2010.06.007>`
24 Parameters:
26 n_top: int or None
27 The number of atoms to optimize (None = include all).
29 dE: float
30 Energy difference above which two structures are
31 automatically considered to be different. (Default 1 eV)
33 cos_dist_max: float
34 Maximal cosine distance between two structures in
35 order to be still considered the same structure. Default 5e-3
37 rcut: float
38 Cutoff radius in Angstrom for the fingerprints.
39 (Default 20 Angstrom)
41 binwidth: float
42 Width in Angstrom of the bins over which the fingerprints
43 are discretized. (Default 0.05 Angstrom)
45 pbc: list of three booleans or None
46 Specifies whether to apply periodic boundary conditions
47 along each of the three unit cell vectors when calculating
48 the fingerprint. The default (None) is to apply PBCs in all
49 3 directions.
51 Note: for isolated systems (pbc = [False, False, False]),
52 the pair correlation function itself is always short-ranged
53 (decays to zero beyond a certain radius), so unity is not
54 subtracted for calculating the fingerprint. Also the
55 volume normalization disappears.
57 maxdims: list of three floats or None
58 If PBCs in only 1 or 2 dimensions are specified, the
59 maximal thicknesses along the non-periodic directions can
60 be specified here (the values given for the periodic
61 directions will not be used). If set to None (the
62 default), the length of the cell vector along the
63 non-periodic direction is used.
65 Note: in this implementation, the cell vectors are
66 assumed to be orthogonal.
68 sigma: float
69 Standard deviation of the gaussian smearing to be applied
70 in the calculation of the fingerprints (in
71 Angstrom). Default 0.02 Angstrom.
73 nsigma: int
74 Distance (as the number of standard deviations sigma) at
75 which the gaussian smearing is cut off (i.e. no smearing
76 beyond that distance). (Default 4)
78 recalculate: boolean
79 If True, ignores the fingerprints stored in
80 atoms.info and recalculates them. (Default False)
82 """
84 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20.,
85 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True,
86 maxdims=None, recalculate=False):
87 self.n_top = n_top or 0
88 self.dE = dE
89 self.cos_dist_max = cos_dist_max
90 self.rcut = rcut
91 self.binwidth = binwidth
92 self.pbc = pbc2pbc(pbc)
94 if maxdims is None:
95 self.maxdims = [None] * 3
96 else:
97 self.maxdims = maxdims
99 self.sigma = sigma
100 self.nsigma = nsigma
101 self.recalculate = recalculate
102 self.dimensions = self.pbc.sum()
104 if self.dimensions == 1 or self.dimensions == 2:
105 for direction in range(3):
106 if not self.pbc[direction]:
107 if self.maxdims[direction] is not None:
108 if self.maxdims[direction] <= 0:
109 e = '''If a max thickness is specificed in maxdims
110 for a non-periodic direction, it has to be
111 strictly positive.'''
112 raise ValueError(e)
114 def looks_like(self, a1, a2):
115 """ Return if structure a1 or a2 are similar or not. """
116 if len(a1) != len(a2):
117 raise Exception('The two configurations are not the same size.')
119 # first we check the energy criteria
120 if a1.calc is not None and a2.calc is not None:
121 dE = abs(a1.get_potential_energy() - a2.get_potential_energy())
122 if dE >= self.dE:
123 return False
125 # then we check the structure
126 cos_dist = self._compare_structure(a1, a2)
127 verdict = cos_dist < self.cos_dist_max
128 return verdict
130 def _json_encode(self, fingerprints, typedic):
131 """ json does not accept tuples nor integers as dict keys,
132 so in order to write the fingerprints to atoms.info, we need
133 to convert them to strings """
134 fingerprints_encoded = {}
135 for key, val in fingerprints.items():
136 try:
137 newkey = "_".join(map(str, list(key)))
138 except TypeError:
139 newkey = str(key)
140 if isinstance(val, dict):
141 fingerprints_encoded[newkey] = {
142 str(key2): val2 for key2, val2 in val.items()}
143 else:
144 fingerprints_encoded[newkey] = val
145 typedic_encoded = {}
146 for key, val in typedic.items():
147 newkey = str(key)
148 typedic_encoded[newkey] = val
149 return [fingerprints_encoded, typedic_encoded]
151 def _json_decode(self, fingerprints, typedic):
152 """ This is the reverse operation of _json_encode """
153 fingerprints_decoded = {}
154 for key, val in fingerprints.items():
155 newkey = list(map(int, key.split("_")))
156 if len(newkey) > 1:
157 newkey = tuple(newkey)
158 else:
159 newkey = newkey[0]
161 if isinstance(val, dict):
162 fingerprints_decoded[newkey] = {
163 int(key2): np.array(val2) for key2, val2 in val.items()
164 }
165 else:
166 fingerprints_decoded[newkey] = np.array(val)
167 typedic_decoded = {}
168 for key, val in typedic.items():
169 newkey = int(key)
170 typedic_decoded[newkey] = val
171 return [fingerprints_decoded, typedic_decoded]
173 def _compare_structure(self, a1, a2):
174 """ Returns the cosine distance between the two structures,
175 using their fingerprints. """
177 if len(a1) != len(a2):
178 raise Exception('The two configurations are not the same size.')
180 a1top = a1[-self.n_top:]
181 a2top = a2[-self.n_top:]
183 if 'fingerprints' in a1.info and not self.recalculate:
184 fp1, typedic1 = a1.info['fingerprints']
185 fp1, typedic1 = self._json_decode(fp1, typedic1)
186 else:
187 fp1, typedic1 = self._take_fingerprints(a1top)
188 a1.info['fingerprints'] = self._json_encode(fp1, typedic1)
190 if 'fingerprints' in a2.info and not self.recalculate:
191 fp2, typedic2 = a2.info['fingerprints']
192 fp2, typedic2 = self._json_decode(fp2, typedic2)
193 else:
194 fp2, typedic2 = self._take_fingerprints(a2top)
195 a2.info['fingerprints'] = self._json_encode(fp2, typedic2)
197 if sorted(fp1) != sorted(fp2):
198 raise AssertionError('The two structures have fingerprints '
199 'with different compounds.')
200 for key in typedic1:
201 if not np.array_equal(typedic1[key], typedic2[key]):
202 raise AssertionError('The two structures have a different '
203 'stoichiometry or ordering!')
205 cos_dist = self._cosine_distance(fp1, fp2, typedic1)
206 return cos_dist
208 def _get_volume(self, a):
209 ''' Calculates the normalizing value, and other parameters
210 (pmin,pmax,qmin,qmax) that are used for surface area calculation
211 in the case of 1 or 2-D periodicity.'''
213 cell = a.get_cell()
214 scalpos = a.get_scaled_positions()
216 # defaults:
217 volume = 1.
218 pmin, pmax, qmin, qmax = [0.] * 4
220 if self.dimensions == 1 or self.dimensions == 2:
221 for direction in range(3):
222 if not self.pbc[direction]:
223 if self.maxdims[direction] is None:
224 maxdim = np.linalg.norm(cell[direction, :])
225 self.maxdims[direction] = maxdim
227 pbc_dirs = [i for i in range(3) if self.pbc[i]]
228 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
230 if self.dimensions == 3:
231 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :]))
233 elif self.dimensions == 2:
234 non_pbc_dir = non_pbc_dirs[0]
236 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :])
237 b = self.maxdims[non_pbc_dir]
238 b /= np.linalg.norm(cell[non_pbc_dir, :])
240 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :]))
242 maxpos = np.max(scalpos[:, non_pbc_dir])
243 minpos = np.min(scalpos[:, non_pbc_dir])
244 pwidth = maxpos - minpos
245 pmargin = 0.5 * (b - pwidth)
246 # note: here is a place where we assume that the
247 # non-periodic direction is orthogonal to the periodic ones:
248 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin
249 pmin *= np.linalg.norm(cell[non_pbc_dir, :])
250 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin
251 pmax *= np.linalg.norm(cell[non_pbc_dir, :])
253 elif self.dimensions == 1:
254 pbc_dir = pbc_dirs[0]
256 v0 = cell[non_pbc_dirs[0], :]
257 b0 = self.maxdims[non_pbc_dirs[0]]
258 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :])
259 v1 = cell[non_pbc_dirs[1], :]
260 b1 = self.maxdims[non_pbc_dirs[1]]
261 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :])
263 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1),
264 cell[pbc_dir, :]))
266 # note: here is a place where we assume that the
267 # non-periodic direction is orthogonal to the periodic ones:
268 maxpos = np.max(scalpos[:, non_pbc_dirs[0]])
269 minpos = np.min(scalpos[:, non_pbc_dirs[0]])
270 pwidth = maxpos - minpos
271 pmargin = 0.5 * (b0 - pwidth)
273 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin
274 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :])
275 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin
276 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :])
278 maxpos = np.max(scalpos[:, non_pbc_dirs[1]])
279 minpos = np.min(scalpos[:, non_pbc_dirs[1]])
280 qwidth = maxpos - minpos
281 qmargin = 0.5 * (b1 - qwidth)
283 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin
284 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :])
285 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin
286 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :])
288 elif self.dimensions == 0:
289 volume = 1.
291 return [volume, pmin, pmax, qmin, qmax]
293 def _take_fingerprints(self, atoms, individual=False):
294 """ Returns a [fingerprints,typedic] list, where fingerprints
295 is a dictionary with the fingerprints, and typedic is a
296 dictionary with the list of atom indices for each element
297 (or "type") in the atoms object.
298 The keys in the fingerprints dictionary are the (A,B) tuples,
299 which are the different element-element combinations in the
300 atoms object (A and B are the atomic numbers).
301 When A != B, the (A,B) tuple is sorted (A < B).
303 If individual=True, a dict is returned, where each atom index
304 has an {atomic_number:fingerprint} dict as value.
305 If individual=False, the fingerprints from atoms of the same
306 atomic number are added together."""
308 pos = atoms.get_positions()
309 num = atoms.get_atomic_numbers()
310 cell = atoms.get_cell()
312 unique_types = np.unique(num)
313 posdic = {}
314 typedic = {}
315 for t in unique_types:
316 tlist = [i for i, atom in enumerate(atoms) if atom.number == t]
317 typedic[t] = tlist
318 posdic[t] = pos[tlist]
320 # determining the volume normalization and other parameters
321 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms)
323 # functions for calculating the surface area
324 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
326 def surface_area_0d(r):
327 return 4 * np.pi * (r**2)
329 def surface_area_1d(r, pos):
330 q0 = pos[non_pbc_dirs[1]]
331 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real
332 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real
333 factor = 1 - (phi1 + phi2) / np.pi
334 return surface_area_2d(r, pos) * factor
336 def surface_area_2d(r, pos):
337 p0 = pos[non_pbc_dirs[0]]
338 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r)
339 area *= 2 * np.pi * r
340 return area
342 def surface_area_3d(r):
343 return 4 * np.pi * (r**2)
345 # build neighborlist
346 # this is computationally the most intensive part
347 a = atoms.copy()
348 a.set_pbc(self.pbc)
349 nl = NeighborList([self.rcut / 2.] * len(a), skin=0.,
350 self_interaction=False, bothways=True)
351 nl.update(a)
353 # parameters for the binning:
354 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth))
355 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma
356 smearing_norm = erf(x)
357 nbins = int(np.ceil(self.rcut * 1. / self.binwidth))
358 bindist = self.binwidth * np.arange(1, nbins + 1)
360 def take_individual_rdf(index, unique_type):
361 # Computes the radial distribution function of atoms
362 # of type unique_type around the atom with index "index".
363 rdf = np.zeros(nbins)
365 if self.dimensions == 3:
366 weights = 1. / surface_area_3d(bindist)
367 elif self.dimensions == 2:
368 weights = 1. / surface_area_2d(bindist, pos[index])
369 elif self.dimensions == 1:
370 weights = 1. / surface_area_1d(bindist, pos[index])
371 elif self.dimensions == 0:
372 weights = 1. / surface_area_0d(bindist)
373 weights /= self.binwidth
375 indices, offsets = nl.get_neighbors(index)
376 valid = np.where(num[indices] == unique_type)
377 p = pos[indices[valid]] + np.dot(offsets[valid], cell)
378 r = cdist(p, [pos[index]])
379 bins = np.floor(r / self.binwidth)
381 for i in range(-m, m + 1):
382 newbins = bins + i
383 valid = np.where((newbins >= 0) & (newbins < nbins))
384 valid_bins = newbins[valid].astype(int)
385 values = weights[valid_bins]
387 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma
388 values *= 0.5 * erf(c * (2 * i + 1)) - \
389 0.5 * erf(c * (2 * i - 1))
390 values /= smearing_norm
392 for j, valid_bin in enumerate(valid_bins):
393 rdf[valid_bin] += values[j]
395 rdf /= len(typedic[unique_type]) * 1. / volume
396 return rdf
398 fingerprints = {}
399 if individual:
400 for i in range(len(atoms)):
401 fingerprints[i] = {}
402 for unique_type in unique_types:
403 fingerprint = take_individual_rdf(i, unique_type)
404 if self.dimensions > 0:
405 fingerprint -= 1
406 fingerprints[i][unique_type] = fingerprint
407 else:
408 for t1, t2 in combinations_with_replacement(unique_types, r=2):
409 key = (t1, t2)
410 fingerprint = np.zeros(nbins)
411 for i in typedic[t1]:
412 fingerprint += take_individual_rdf(i, t2)
413 fingerprint /= len(typedic[t1])
414 if self.dimensions > 0:
415 fingerprint -= 1
416 fingerprints[key] = fingerprint
418 return [fingerprints, typedic]
420 def _calculate_local_orders(self, individual_fingerprints, typedic,
421 volume):
422 """ Returns a list with the local order for every atom,
423 using the definition of local order from
424 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
425 :doi:`10.1016/j.cpc.2010.06.007`"""
427 # total number of atoms:
428 n_tot = sum(len(typedic[key]) for key in typedic)
429 inv_n_tot = 1. / n_tot
431 local_orders = []
432 for fingerprints in individual_fingerprints.values():
433 local_order = 0
434 for unique_type, fingerprint in fingerprints.items():
435 term = np.linalg.norm(fingerprint)**2
436 term *= self.binwidth
437 term *= (volume * inv_n_tot)**(-1 / 3)
438 term *= len(typedic[unique_type]) * inv_n_tot
439 local_order += term
440 local_orders.append(np.sqrt(local_order))
442 return local_orders
444 def get_local_orders(self, a):
445 """ Returns the local orders of all the atoms."""
447 a_top = a[-self.n_top:]
448 key = 'individual_fingerprints'
450 if key in a.info and not self.recalculate:
451 fp, typedic = self._json_decode(*a.info[key])
452 else:
453 fp, typedic = self._take_fingerprints(a_top, individual=True)
454 a.info[key] = self._json_encode(fp, typedic)
456 volume, _pmin, _pmax, _qmin, _qmax = self._get_volume(a_top)
457 return self._calculate_local_orders(fp, typedic, volume)
459 def _cosine_distance(self, fp1, fp2, typedic):
460 """ Returns the cosine distance from two fingerprints.
461 It also needs information about the number of atoms from
462 each element, which is included in "typedic"."""
464 keys = sorted(fp1)
466 # calculating the weights:
467 w = {}
468 wtot = 0
469 for key in keys:
470 weight = len(typedic[key[0]]) * len(typedic[key[1]])
471 wtot += weight
472 w[key] = weight
473 for key in keys:
474 w[key] *= 1. / wtot
476 # calculating the fingerprint norms:
477 norm1 = 0
478 norm2 = 0
479 for key in keys:
480 norm1 += (np.linalg.norm(fp1[key])**2) * w[key]
481 norm2 += (np.linalg.norm(fp2[key])**2) * w[key]
482 norm1 = np.sqrt(norm1)
483 norm2 = np.sqrt(norm2)
485 # calculating the distance:
486 distance = 0
487 for key in keys:
488 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2)
490 distance = 0.5 * (1 - distance)
491 return distance
493 def plot_fingerprints(self, a, prefix=''):
494 """ Function for quickly plotting all the fingerprints.
495 Prefix = a prefix you want to give to the resulting PNG file."""
497 if 'fingerprints' in a.info and not self.recalculate:
498 fp, typedic = a.info['fingerprints']
499 fp, typedic = self._json_decode(fp, typedic)
500 else:
501 a_top = a[-self.n_top:]
502 fp, typedic = self._take_fingerprints(a_top)
503 a.info['fingerprints'] = self._json_encode(fp, typedic)
505 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
506 x = np.linspace(0, self.rcut, npts, endpoint=False)
508 for key, val in fp.items():
509 plt.plot(x, val)
510 suffix = f"_fp_{key[0]}_{key[1]}.png"
511 plt.savefig(prefix + suffix)
512 plt.clf()
514 def plot_individual_fingerprints(self, a, prefix=''):
515 """ Function for plotting all the individual fingerprints.
516 Prefix = a prefix for the resulting PNG file."""
517 if 'individual_fingerprints' in a.info and not self.recalculate:
518 fp, typedic = a.info['individual_fingerprints']
519 else:
520 a_top = a[-self.n_top:]
521 fp, typedic = self._take_fingerprints(a_top, individual=True)
522 a.info['individual_fingerprints'] = [fp, typedic]
524 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
525 x = np.linspace(0, self.rcut, npts, endpoint=False)
527 for key, val in fp.items():
528 for key2, val2 in val.items():
529 plt.plot(x, val2)
530 plt.ylim([-1, 10])
531 suffix = f"_individual_fp_{key}_{key2}.png"
532 plt.savefig(prefix + suffix)
533 plt.clf()