Coverage for /builds/ase/ase/ase/io/lammpsrun.py: 88.24%
221 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3import gzip
4import struct
5from collections import deque
6from os.path import splitext
8import numpy as np
10from ase.atoms import Atoms
11from ase.calculators.lammps import convert
12from ase.calculators.singlepoint import SinglePointCalculator
13from ase.data import atomic_masses, chemical_symbols
14from ase.parallel import paropen
17def read_lammps_dump(infileobj, **kwargs):
18 """Method which reads a LAMMPS dump file.
20 LAMMPS chooses output method depending on the given suffix:
21 - .bin : binary file
22 - .gz : output piped through gzip
23 - .mpiio: using mpiio (should be like cleartext,
24 with different ordering)
25 - else : normal clear-text format
27 :param infileobj: string to file, opened file or file-like stream
29 """
30 # !TODO: add support for lammps-regex naming schemes (output per
31 # processor and timestep wildcards)
33 opened = False
34 if isinstance(infileobj, str):
35 opened = True
36 suffix = splitext(infileobj)[-1]
37 if suffix == ".bin":
38 fileobj = paropen(infileobj, "rb")
39 elif suffix == ".gz":
40 # !TODO: save for parallel execution?
41 fileobj = gzip.open(infileobj, "rb")
42 else:
43 fileobj = paropen(infileobj)
44 else:
45 suffix = splitext(infileobj.name)[-1]
46 fileobj = infileobj
48 if suffix == ".bin":
49 out = read_lammps_dump_binary(fileobj, **kwargs)
50 if opened:
51 fileobj.close()
52 return out
54 out = read_lammps_dump_text(fileobj, **kwargs)
56 if opened:
57 fileobj.close()
59 return out
62def lammps_data_to_ase_atoms(
63 data,
64 colnames,
65 cell,
66 celldisp,
67 pbc=False,
68 atomsobj=Atoms,
69 order=True,
70 specorder=None,
71 prismobj=None,
72 units="metal",
73):
74 """Extract positions and other per-atom parameters and create Atoms
76 :param data: per atom data
77 :param colnames: index for data
78 :param cell: cell dimensions
79 :param celldisp: origin shift
80 :param pbc: periodic boundaries
81 :param atomsobj: function to create ase-Atoms object
82 :param order: sort atoms by id. Might be faster to turn off.
83 Disregarded in case `id` column is not given in file.
84 :param specorder: list of species to map lammps types to ase-species
85 (usually .dump files to not contain type to species mapping)
86 :param prismobj: Coordinate transformation between lammps and ase
87 :type prismobj: Prism
88 :param units: lammps units for unit transformation between lammps and ase
89 :returns: Atoms object
90 :rtype: Atoms
92 """
93 if len(data.shape) == 1:
94 data = data[np.newaxis, :]
96 # read IDs if given and order if needed
97 if "id" in colnames:
98 ids = data[:, colnames.index("id")].astype(int)
99 if order:
100 sort_order = np.argsort(ids)
101 data = data[sort_order, :]
103 # determine the elements
104 if "element" in colnames:
105 # priority to elements written in file
106 elements = data[:, colnames.index("element")]
107 elif "mass" in colnames:
108 # try to determine elements from masses
109 elements = [
110 _mass2element(m)
111 for m in data[:, colnames.index("mass")].astype(float)
112 ]
113 elif "type" in colnames:
114 # fall back to `types` otherwise
115 elements = data[:, colnames.index("type")].astype(int)
117 # reconstruct types from given specorder
118 if specorder:
119 elements = [specorder[t - 1] for t in elements]
120 else:
121 # todo: what if specorder give but no types?
122 # in principle the masses could work for atoms, but that needs
123 # lots of cases and new code I guess
124 raise ValueError("Cannot determine atom types form LAMMPS dump file")
126 def get_quantity(labels, quantity=None):
127 try:
128 cols = [colnames.index(label) for label in labels]
129 if quantity:
130 return convert(data[:, cols].astype(float), quantity,
131 units, "ASE")
133 return data[:, cols].astype(float)
134 except ValueError:
135 return None
137 # Positions
138 positions = None
139 scaled_positions = None
140 if "x" in colnames:
141 # doc: x, y, z = unscaled atom coordinates
142 positions = get_quantity(["x", "y", "z"], "distance")
143 elif "xs" in colnames:
144 # doc: xs,ys,zs = scaled atom coordinates
145 scaled_positions = get_quantity(["xs", "ys", "zs"])
146 elif "xu" in colnames:
147 # doc: xu,yu,zu = unwrapped atom coordinates
148 positions = get_quantity(["xu", "yu", "zu"], "distance")
149 elif "xsu" in colnames:
150 # xsu,ysu,zsu = scaled unwrapped atom coordinates
151 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])
152 else:
153 raise ValueError("No atomic positions found in LAMMPS output")
155 velocities = get_quantity(["vx", "vy", "vz"], "velocity")
156 charges = get_quantity(["q"], "charge")
157 forces = get_quantity(["fx", "fy", "fz"], "force")
158 # !TODO: how need quaternions be converted?
159 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
161 # convert cell
162 cell = convert(cell, "distance", units, "ASE")
163 celldisp = convert(celldisp, "distance", units, "ASE")
164 if prismobj:
165 celldisp = prismobj.vector_to_ase(celldisp)
166 cell = prismobj.update_cell(cell)
168 if quaternions is not None:
169 out_atoms = atomsobj(
170 symbols=elements,
171 positions=positions,
172 cell=cell,
173 celldisp=celldisp,
174 pbc=pbc,
175 )
176 out_atoms.new_array('quaternions', quaternions, dtype=float)
177 elif positions is not None:
178 # reverse coordinations transform to lammps system
179 # (for all vectors = pos, vel, force)
180 if prismobj:
181 positions = prismobj.vector_to_ase(positions, wrap=True)
183 out_atoms = atomsobj(
184 symbols=elements,
185 positions=positions,
186 pbc=pbc,
187 celldisp=celldisp,
188 cell=cell
189 )
190 elif scaled_positions is not None:
191 out_atoms = atomsobj(
192 symbols=elements,
193 scaled_positions=scaled_positions,
194 pbc=pbc,
195 celldisp=celldisp,
196 cell=cell,
197 )
199 if velocities is not None:
200 if prismobj:
201 velocities = prismobj.vector_to_ase(velocities)
202 out_atoms.set_velocities(velocities)
203 if charges is not None:
204 out_atoms.set_initial_charges([charge[0] for charge in charges])
205 if forces is not None:
206 if prismobj:
207 forces = prismobj.vector_to_ase(forces)
208 # !TODO: use another calculator if available (or move forces
209 # to atoms.property) (other problem: synchronizing
210 # parallel runs)
211 calculator = SinglePointCalculator(out_atoms, energy=0.0,
212 forces=forces)
213 out_atoms.calc = calculator
215 # process the extra columns of fixes, variables and computes
216 # that can be dumped, add as additional arrays to atoms object
217 for colname in colnames:
218 # determine if it is a compute, fix or
219 # custom property/atom (but not the quaternian)
220 if (colname.startswith('f_') or colname.startswith('v_') or
221 colname.startswith('d_') or colname.startswith('d2_') or
222 (colname.startswith('c_') and not colname.startswith('c_q['))):
223 out_atoms.new_array(colname, get_quantity([colname]),
224 dtype='float')
226 elif colname.startswith('i_') or colname.startswith('i2_'):
227 out_atoms.new_array(colname, get_quantity([colname]),
228 dtype='int')
230 return out_atoms
233def construct_cell(diagdisp, offdiag):
234 """Help function to create an ASE-cell with displacement vector from
235 the lammps coordination system parameters.
237 :param diagdisp: cell dimension convoluted with the displacement vector
238 :param offdiag: off-diagonal cell elements
239 :returns: cell and cell displacement vector
240 :rtype: tuple
241 """
242 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
243 xy, xz, yz = offdiag
245 # create ase-cell from lammps-box
246 xhilo = (xhi - xlo) - abs(xy) - abs(xz)
247 yhilo = (yhi - ylo) - abs(yz)
248 zhilo = zhi - zlo
249 celldispx = xlo - min(0, xy) - min(0, xz)
250 celldispy = ylo - min(0, yz)
251 celldispz = zlo
252 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
253 celldisp = np.array([celldispx, celldispy, celldispz])
255 return cell, celldisp
258def get_max_index(index):
259 if np.isscalar(index):
260 return index
261 elif isinstance(index, slice):
262 return index.stop if (index.stop is not None) else float("inf")
265def read_lammps_dump_text(fileobj, index=-1, **kwargs):
266 """Process cleartext lammps dumpfiles
268 :param fileobj: filestream providing the trajectory data
269 :param index: integer or slice object (default: get the last timestep)
270 :returns: list of Atoms objects
271 :rtype: list
272 """
273 # Load all dumped timesteps into memory simultaneously
274 lines = deque(fileobj.readlines())
275 index_end = get_max_index(index)
277 n_atoms = 0
278 images = []
280 # avoid references before assignment in case of incorrect file structure
281 cell, celldisp, pbc, info = None, None, False, {}
283 while len(lines) > n_atoms:
284 line = lines.popleft()
286 if "ITEM: TIMESTEP" in line:
287 line = lines.popleft()
288 # !TODO: pyflakes complains about this line -> do something
289 ntimestep = int(line.split()[0]) # NOQA
290 info["timestep"] = ntimestep
292 if "ITEM: NUMBER OF ATOMS" in line:
293 line = lines.popleft()
294 n_atoms = int(line.split()[0])
296 if "ITEM: BOX BOUNDS" in line:
297 # save labels behind "ITEM: BOX BOUNDS" in triclinic case
298 # (>=lammps-7Jul09)
299 tilt_items = line.split()[3:]
300 celldatarows = [lines.popleft() for _ in range(3)]
301 celldata = np.loadtxt(celldatarows)
302 diagdisp = celldata[:, :2].reshape(6, 1).flatten()
304 # determine cell tilt (triclinic case!)
305 if len(celldata[0]) > 2:
306 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
307 # to assign tilt (vector) elements ...
308 offdiag = celldata[:, 2]
309 # ... otherwise assume default order in 3rd column
310 # (if the latter was present)
311 if len(tilt_items) >= 3:
312 sort_index = [tilt_items.index(i)
313 for i in ["xy", "xz", "yz"]]
314 offdiag = offdiag[sort_index]
315 else:
316 offdiag = (0.0,) * 3
318 cell, celldisp = construct_cell(diagdisp, offdiag)
320 # Handle pbc conditions
321 if len(tilt_items) == 3:
322 pbc_items = tilt_items
323 elif len(tilt_items) > 3:
324 pbc_items = tilt_items[3:6]
325 else:
326 pbc_items = ["f", "f", "f"]
327 pbc = ["p" in d.lower() for d in pbc_items]
329 if "ITEM: ATOMS" in line:
330 colnames = line.split()[2:]
331 datarows = [lines.popleft() for _ in range(n_atoms)]
332 data = np.loadtxt(datarows, dtype=str, ndmin=2)
333 out_atoms = lammps_data_to_ase_atoms(
334 data=data,
335 colnames=colnames,
336 cell=cell,
337 celldisp=celldisp,
338 atomsobj=Atoms,
339 pbc=pbc,
340 **kwargs,
341 )
342 out_atoms.info.update(info)
343 images.append(out_atoms)
345 if len(images) > index_end >= 0:
346 break
348 return images[index]
351def read_lammps_dump_binary(
352 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs
353):
354 """Read binary dump-files (after binary2txt.cpp from lammps/tools)
356 :param fileobj: file-stream containing the binary lammps data
357 :param index: integer or slice object (default: get the last timestep)
358 :param colnames: data is columns and identified by a header
359 :param intformat: lammps support different integer size. Parameter set \
360 at compile-time and can unfortunately not derived from data file
361 :returns: list of Atoms-objects
362 :rtype: list
363 """
364 # depending on the chosen compilation flag lammps uses either normal
365 # integers or long long for its id or timestep numbering
366 # !TODO: tags are cast to double -> missing/double ids (add check?)
367 _tagformat, bigformat = dict(
368 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")
369 )[intformat]
371 index_end = get_max_index(index)
373 # Standard columns layout from lammpsrun
374 if not colnames:
375 colnames = ["id", "type", "x", "y", "z",
376 "vx", "vy", "vz", "fx", "fy", "fz"]
378 images = []
380 # wrap struct.unpack to raise EOFError
381 def read_variables(string):
382 obj_len = struct.calcsize(string)
383 data_obj = fileobj.read(obj_len)
384 if obj_len != len(data_obj):
385 raise EOFError
386 return struct.unpack(string, data_obj)
388 while True:
389 try:
390 # Assume that the binary dump file is in the old (pre-29Oct2020)
391 # format
392 magic_string = None
394 # read header
395 ntimestep, = read_variables("=" + bigformat)
397 # In the new LAMMPS binary dump format (version 29Oct2020 and
398 # onward), a negative timestep is used to indicate that the next
399 # few bytes will contain certain metadata
400 if ntimestep < 0:
401 # First bigint was actually encoding the negative of the format
402 # name string length (we call this 'magic_string' to
403 magic_string_len = -ntimestep
405 # The next `magic_string_len` bytes will hold a string
406 # indicating the format of the dump file
407 magic_string = b''.join(read_variables(
408 "=" + str(magic_string_len) + "c"))
410 # Read endianness (integer). For now, we'll disregard the value
411 # and simply use the host machine's endianness (via '='
412 # character used with struct.calcsize).
413 #
414 # TODO: Use the endianness of the dump file in subsequent
415 # read_variables rather than just assuming it will match
416 # that of the host
417 read_variables("=i")
419 # Read revision number (integer)
420 revision, = read_variables("=i")
422 # Finally, read the actual timestep (bigint)
423 ntimestep, = read_variables("=" + bigformat)
425 _n_atoms, triclinic = read_variables("=" + bigformat + "i")
426 boundary = read_variables("=6i")
427 diagdisp = read_variables("=6d")
428 if triclinic != 0:
429 offdiag = read_variables("=3d")
430 else:
431 offdiag = (0.0,) * 3
432 size_one, = read_variables("=i")
434 if len(colnames) != size_one:
435 raise ValueError("Provided columns do not match binary file")
437 if magic_string and revision > 1:
438 # New binary dump format includes units string,
439 # columns string, and time
440 units_str_len, = read_variables("=i")
442 if units_str_len > 0:
443 # Read lammps units style
444 _ = b''.join(
445 read_variables("=" + str(units_str_len) + "c"))
447 flag, = read_variables("=c")
448 if flag != b'\x00':
449 # Flag was non-empty string
450 read_variables("=d")
452 # Length of column string
453 columns_str_len, = read_variables("=i")
455 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")
456 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))
458 nchunk, = read_variables("=i")
460 # lammps cells/boxes can have different boundary conditions on each
461 # sides (makes mainly sense for different non-periodic conditions
462 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))
463 # periodic case: b 0 = 'p'
464 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'
465 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0
467 cell, celldisp = construct_cell(diagdisp, offdiag)
469 data = []
470 for _ in range(nchunk):
471 # number-of-data-entries
472 n_data, = read_variables("=i")
473 # retrieve per atom data
474 data += read_variables("=" + str(n_data) + "d")
475 data = np.array(data).reshape((-1, size_one))
477 # map data-chunk to ase atoms
478 out_atoms = lammps_data_to_ase_atoms(
479 data=data,
480 colnames=colnames,
481 cell=cell,
482 celldisp=celldisp,
483 pbc=pbc,
484 **kwargs
485 )
487 images.append(out_atoms)
489 # stop if requested index has been found
490 if len(images) > index_end >= 0:
491 break
493 except EOFError:
494 break
496 return images[index]
499def _mass2element(mass):
500 """
501 Guess the element corresponding to a given atomic mass.
503 :param mass: Atomic mass for searching.
504 :return: Element symbol as a string.
505 """
506 min_idx = np.argmin(np.abs(atomic_masses - mass))
507 element = chemical_symbols[min_idx]
508 return element