Coverage for /builds/ase/ase/ase/io/lammpsrun.py: 88.24%

221 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3import gzip 

4import struct 

5from collections import deque 

6from os.path import splitext 

7 

8import numpy as np 

9 

10from ase.atoms import Atoms 

11from ase.calculators.lammps import convert 

12from ase.calculators.singlepoint import SinglePointCalculator 

13from ase.data import atomic_masses, chemical_symbols 

14from ase.parallel import paropen 

15 

16 

17def read_lammps_dump(infileobj, **kwargs): 

18 """Method which reads a LAMMPS dump file. 

19 

20 LAMMPS chooses output method depending on the given suffix: 

21 - .bin : binary file 

22 - .gz : output piped through gzip 

23 - .mpiio: using mpiio (should be like cleartext, 

24 with different ordering) 

25 - else : normal clear-text format 

26 

27 :param infileobj: string to file, opened file or file-like stream 

28 

29 """ 

30 # !TODO: add support for lammps-regex naming schemes (output per 

31 # processor and timestep wildcards) 

32 

33 opened = False 

34 if isinstance(infileobj, str): 

35 opened = True 

36 suffix = splitext(infileobj)[-1] 

37 if suffix == ".bin": 

38 fileobj = paropen(infileobj, "rb") 

39 elif suffix == ".gz": 

40 # !TODO: save for parallel execution? 

41 fileobj = gzip.open(infileobj, "rb") 

42 else: 

43 fileobj = paropen(infileobj) 

44 else: 

45 suffix = splitext(infileobj.name)[-1] 

46 fileobj = infileobj 

47 

48 if suffix == ".bin": 

49 out = read_lammps_dump_binary(fileobj, **kwargs) 

50 if opened: 

51 fileobj.close() 

52 return out 

53 

54 out = read_lammps_dump_text(fileobj, **kwargs) 

55 

56 if opened: 

57 fileobj.close() 

58 

59 return out 

60 

61 

62def lammps_data_to_ase_atoms( 

63 data, 

64 colnames, 

65 cell, 

66 celldisp, 

67 pbc=False, 

68 atomsobj=Atoms, 

69 order=True, 

70 specorder=None, 

71 prismobj=None, 

72 units="metal", 

73): 

74 """Extract positions and other per-atom parameters and create Atoms 

75 

76 :param data: per atom data 

77 :param colnames: index for data 

78 :param cell: cell dimensions 

79 :param celldisp: origin shift 

80 :param pbc: periodic boundaries 

81 :param atomsobj: function to create ase-Atoms object 

82 :param order: sort atoms by id. Might be faster to turn off. 

83 Disregarded in case `id` column is not given in file. 

84 :param specorder: list of species to map lammps types to ase-species 

85 (usually .dump files to not contain type to species mapping) 

86 :param prismobj: Coordinate transformation between lammps and ase 

87 :type prismobj: Prism 

88 :param units: lammps units for unit transformation between lammps and ase 

89 :returns: Atoms object 

90 :rtype: Atoms 

91 

92 """ 

93 if len(data.shape) == 1: 

94 data = data[np.newaxis, :] 

95 

96 # read IDs if given and order if needed 

97 if "id" in colnames: 

98 ids = data[:, colnames.index("id")].astype(int) 

99 if order: 

100 sort_order = np.argsort(ids) 

101 data = data[sort_order, :] 

102 

103 # determine the elements 

104 if "element" in colnames: 

105 # priority to elements written in file 

106 elements = data[:, colnames.index("element")] 

107 elif "mass" in colnames: 

108 # try to determine elements from masses 

109 elements = [ 

110 _mass2element(m) 

111 for m in data[:, colnames.index("mass")].astype(float) 

112 ] 

113 elif "type" in colnames: 

114 # fall back to `types` otherwise 

115 elements = data[:, colnames.index("type")].astype(int) 

116 

117 # reconstruct types from given specorder 

118 if specorder: 

119 elements = [specorder[t - 1] for t in elements] 

120 else: 

121 # todo: what if specorder give but no types? 

122 # in principle the masses could work for atoms, but that needs 

123 # lots of cases and new code I guess 

124 raise ValueError("Cannot determine atom types form LAMMPS dump file") 

125 

126 def get_quantity(labels, quantity=None): 

127 try: 

128 cols = [colnames.index(label) for label in labels] 

129 if quantity: 

130 return convert(data[:, cols].astype(float), quantity, 

131 units, "ASE") 

132 

133 return data[:, cols].astype(float) 

134 except ValueError: 

135 return None 

136 

137 # Positions 

138 positions = None 

139 scaled_positions = None 

140 if "x" in colnames: 

141 # doc: x, y, z = unscaled atom coordinates 

142 positions = get_quantity(["x", "y", "z"], "distance") 

143 elif "xs" in colnames: 

144 # doc: xs,ys,zs = scaled atom coordinates 

145 scaled_positions = get_quantity(["xs", "ys", "zs"]) 

146 elif "xu" in colnames: 

147 # doc: xu,yu,zu = unwrapped atom coordinates 

148 positions = get_quantity(["xu", "yu", "zu"], "distance") 

149 elif "xsu" in colnames: 

150 # xsu,ysu,zsu = scaled unwrapped atom coordinates 

151 scaled_positions = get_quantity(["xsu", "ysu", "zsu"]) 

152 else: 

153 raise ValueError("No atomic positions found in LAMMPS output") 

154 

155 velocities = get_quantity(["vx", "vy", "vz"], "velocity") 

156 charges = get_quantity(["q"], "charge") 

157 forces = get_quantity(["fx", "fy", "fz"], "force") 

158 # !TODO: how need quaternions be converted? 

159 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"]) 

160 

161 # convert cell 

162 cell = convert(cell, "distance", units, "ASE") 

163 celldisp = convert(celldisp, "distance", units, "ASE") 

164 if prismobj: 

165 celldisp = prismobj.vector_to_ase(celldisp) 

166 cell = prismobj.update_cell(cell) 

167 

168 if quaternions is not None: 

169 out_atoms = atomsobj( 

170 symbols=elements, 

171 positions=positions, 

172 cell=cell, 

173 celldisp=celldisp, 

174 pbc=pbc, 

175 ) 

176 out_atoms.new_array('quaternions', quaternions, dtype=float) 

177 elif positions is not None: 

178 # reverse coordinations transform to lammps system 

179 # (for all vectors = pos, vel, force) 

180 if prismobj: 

181 positions = prismobj.vector_to_ase(positions, wrap=True) 

182 

183 out_atoms = atomsobj( 

184 symbols=elements, 

185 positions=positions, 

186 pbc=pbc, 

187 celldisp=celldisp, 

188 cell=cell 

189 ) 

190 elif scaled_positions is not None: 

191 out_atoms = atomsobj( 

192 symbols=elements, 

193 scaled_positions=scaled_positions, 

194 pbc=pbc, 

195 celldisp=celldisp, 

196 cell=cell, 

197 ) 

198 

199 if velocities is not None: 

200 if prismobj: 

201 velocities = prismobj.vector_to_ase(velocities) 

202 out_atoms.set_velocities(velocities) 

203 if charges is not None: 

204 out_atoms.set_initial_charges([charge[0] for charge in charges]) 

205 if forces is not None: 

206 if prismobj: 

207 forces = prismobj.vector_to_ase(forces) 

208 # !TODO: use another calculator if available (or move forces 

209 # to atoms.property) (other problem: synchronizing 

210 # parallel runs) 

211 calculator = SinglePointCalculator(out_atoms, energy=0.0, 

212 forces=forces) 

213 out_atoms.calc = calculator 

214 

215 # process the extra columns of fixes, variables and computes 

216 # that can be dumped, add as additional arrays to atoms object 

217 for colname in colnames: 

218 # determine if it is a compute, fix or 

219 # custom property/atom (but not the quaternian) 

220 if (colname.startswith('f_') or colname.startswith('v_') or 

221 colname.startswith('d_') or colname.startswith('d2_') or 

222 (colname.startswith('c_') and not colname.startswith('c_q['))): 

223 out_atoms.new_array(colname, get_quantity([colname]), 

224 dtype='float') 

225 

226 elif colname.startswith('i_') or colname.startswith('i2_'): 

227 out_atoms.new_array(colname, get_quantity([colname]), 

228 dtype='int') 

229 

230 return out_atoms 

231 

232 

233def construct_cell(diagdisp, offdiag): 

234 """Help function to create an ASE-cell with displacement vector from 

235 the lammps coordination system parameters. 

236 

237 :param diagdisp: cell dimension convoluted with the displacement vector 

238 :param offdiag: off-diagonal cell elements 

239 :returns: cell and cell displacement vector 

240 :rtype: tuple 

241 """ 

242 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp 

243 xy, xz, yz = offdiag 

244 

245 # create ase-cell from lammps-box 

246 xhilo = (xhi - xlo) - abs(xy) - abs(xz) 

247 yhilo = (yhi - ylo) - abs(yz) 

248 zhilo = zhi - zlo 

249 celldispx = xlo - min(0, xy) - min(0, xz) 

250 celldispy = ylo - min(0, yz) 

251 celldispz = zlo 

252 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]]) 

253 celldisp = np.array([celldispx, celldispy, celldispz]) 

254 

255 return cell, celldisp 

256 

257 

258def get_max_index(index): 

259 if np.isscalar(index): 

260 return index 

261 elif isinstance(index, slice): 

262 return index.stop if (index.stop is not None) else float("inf") 

263 

264 

265def read_lammps_dump_text(fileobj, index=-1, **kwargs): 

266 """Process cleartext lammps dumpfiles 

267 

268 :param fileobj: filestream providing the trajectory data 

269 :param index: integer or slice object (default: get the last timestep) 

270 :returns: list of Atoms objects 

271 :rtype: list 

272 """ 

273 # Load all dumped timesteps into memory simultaneously 

274 lines = deque(fileobj.readlines()) 

275 index_end = get_max_index(index) 

276 

277 n_atoms = 0 

278 images = [] 

279 

280 # avoid references before assignment in case of incorrect file structure 

281 cell, celldisp, pbc, info = None, None, False, {} 

282 

283 while len(lines) > n_atoms: 

284 line = lines.popleft() 

285 

286 if "ITEM: TIMESTEP" in line: 

287 line = lines.popleft() 

288 # !TODO: pyflakes complains about this line -> do something 

289 ntimestep = int(line.split()[0]) # NOQA 

290 info["timestep"] = ntimestep 

291 

292 if "ITEM: NUMBER OF ATOMS" in line: 

293 line = lines.popleft() 

294 n_atoms = int(line.split()[0]) 

295 

296 if "ITEM: BOX BOUNDS" in line: 

297 # save labels behind "ITEM: BOX BOUNDS" in triclinic case 

298 # (>=lammps-7Jul09) 

299 tilt_items = line.split()[3:] 

300 celldatarows = [lines.popleft() for _ in range(3)] 

301 celldata = np.loadtxt(celldatarows) 

302 diagdisp = celldata[:, :2].reshape(6, 1).flatten() 

303 

304 # determine cell tilt (triclinic case!) 

305 if len(celldata[0]) > 2: 

306 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS" 

307 # to assign tilt (vector) elements ... 

308 offdiag = celldata[:, 2] 

309 # ... otherwise assume default order in 3rd column 

310 # (if the latter was present) 

311 if len(tilt_items) >= 3: 

312 sort_index = [tilt_items.index(i) 

313 for i in ["xy", "xz", "yz"]] 

314 offdiag = offdiag[sort_index] 

315 else: 

316 offdiag = (0.0,) * 3 

317 

318 cell, celldisp = construct_cell(diagdisp, offdiag) 

319 

320 # Handle pbc conditions 

321 if len(tilt_items) == 3: 

322 pbc_items = tilt_items 

323 elif len(tilt_items) > 3: 

324 pbc_items = tilt_items[3:6] 

325 else: 

326 pbc_items = ["f", "f", "f"] 

327 pbc = ["p" in d.lower() for d in pbc_items] 

328 

329 if "ITEM: ATOMS" in line: 

330 colnames = line.split()[2:] 

331 datarows = [lines.popleft() for _ in range(n_atoms)] 

332 data = np.loadtxt(datarows, dtype=str, ndmin=2) 

333 out_atoms = lammps_data_to_ase_atoms( 

334 data=data, 

335 colnames=colnames, 

336 cell=cell, 

337 celldisp=celldisp, 

338 atomsobj=Atoms, 

339 pbc=pbc, 

340 **kwargs, 

341 ) 

342 out_atoms.info.update(info) 

343 images.append(out_atoms) 

344 

345 if len(images) > index_end >= 0: 

346 break 

347 

348 return images[index] 

349 

350 

351def read_lammps_dump_binary( 

352 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs 

353): 

354 """Read binary dump-files (after binary2txt.cpp from lammps/tools) 

355 

356 :param fileobj: file-stream containing the binary lammps data 

357 :param index: integer or slice object (default: get the last timestep) 

358 :param colnames: data is columns and identified by a header 

359 :param intformat: lammps support different integer size. Parameter set \ 

360 at compile-time and can unfortunately not derived from data file 

361 :returns: list of Atoms-objects 

362 :rtype: list 

363 """ 

364 # depending on the chosen compilation flag lammps uses either normal 

365 # integers or long long for its id or timestep numbering 

366 # !TODO: tags are cast to double -> missing/double ids (add check?) 

367 _tagformat, bigformat = dict( 

368 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q") 

369 )[intformat] 

370 

371 index_end = get_max_index(index) 

372 

373 # Standard columns layout from lammpsrun 

374 if not colnames: 

375 colnames = ["id", "type", "x", "y", "z", 

376 "vx", "vy", "vz", "fx", "fy", "fz"] 

377 

378 images = [] 

379 

380 # wrap struct.unpack to raise EOFError 

381 def read_variables(string): 

382 obj_len = struct.calcsize(string) 

383 data_obj = fileobj.read(obj_len) 

384 if obj_len != len(data_obj): 

385 raise EOFError 

386 return struct.unpack(string, data_obj) 

387 

388 while True: 

389 try: 

390 # Assume that the binary dump file is in the old (pre-29Oct2020) 

391 # format 

392 magic_string = None 

393 

394 # read header 

395 ntimestep, = read_variables("=" + bigformat) 

396 

397 # In the new LAMMPS binary dump format (version 29Oct2020 and 

398 # onward), a negative timestep is used to indicate that the next 

399 # few bytes will contain certain metadata 

400 if ntimestep < 0: 

401 # First bigint was actually encoding the negative of the format 

402 # name string length (we call this 'magic_string' to 

403 magic_string_len = -ntimestep 

404 

405 # The next `magic_string_len` bytes will hold a string 

406 # indicating the format of the dump file 

407 magic_string = b''.join(read_variables( 

408 "=" + str(magic_string_len) + "c")) 

409 

410 # Read endianness (integer). For now, we'll disregard the value 

411 # and simply use the host machine's endianness (via '=' 

412 # character used with struct.calcsize). 

413 # 

414 # TODO: Use the endianness of the dump file in subsequent 

415 # read_variables rather than just assuming it will match 

416 # that of the host 

417 read_variables("=i") 

418 

419 # Read revision number (integer) 

420 revision, = read_variables("=i") 

421 

422 # Finally, read the actual timestep (bigint) 

423 ntimestep, = read_variables("=" + bigformat) 

424 

425 _n_atoms, triclinic = read_variables("=" + bigformat + "i") 

426 boundary = read_variables("=6i") 

427 diagdisp = read_variables("=6d") 

428 if triclinic != 0: 

429 offdiag = read_variables("=3d") 

430 else: 

431 offdiag = (0.0,) * 3 

432 size_one, = read_variables("=i") 

433 

434 if len(colnames) != size_one: 

435 raise ValueError("Provided columns do not match binary file") 

436 

437 if magic_string and revision > 1: 

438 # New binary dump format includes units string, 

439 # columns string, and time 

440 units_str_len, = read_variables("=i") 

441 

442 if units_str_len > 0: 

443 # Read lammps units style 

444 _ = b''.join( 

445 read_variables("=" + str(units_str_len) + "c")) 

446 

447 flag, = read_variables("=c") 

448 if flag != b'\x00': 

449 # Flag was non-empty string 

450 read_variables("=d") 

451 

452 # Length of column string 

453 columns_str_len, = read_variables("=i") 

454 

455 # Read column string (e.g., "id type x y z vx vy vz fx fy fz") 

456 _ = b''.join(read_variables("=" + str(columns_str_len) + "c")) 

457 

458 nchunk, = read_variables("=i") 

459 

460 # lammps cells/boxes can have different boundary conditions on each 

461 # sides (makes mainly sense for different non-periodic conditions 

462 # (e.g. [f]ixed and [s]hrink for a irradiation simulation)) 

463 # periodic case: b 0 = 'p' 

464 # non-peridic cases 1: 'f', 2 : 's', 3: 'm' 

465 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0 

466 

467 cell, celldisp = construct_cell(diagdisp, offdiag) 

468 

469 data = [] 

470 for _ in range(nchunk): 

471 # number-of-data-entries 

472 n_data, = read_variables("=i") 

473 # retrieve per atom data 

474 data += read_variables("=" + str(n_data) + "d") 

475 data = np.array(data).reshape((-1, size_one)) 

476 

477 # map data-chunk to ase atoms 

478 out_atoms = lammps_data_to_ase_atoms( 

479 data=data, 

480 colnames=colnames, 

481 cell=cell, 

482 celldisp=celldisp, 

483 pbc=pbc, 

484 **kwargs 

485 ) 

486 

487 images.append(out_atoms) 

488 

489 # stop if requested index has been found 

490 if len(images) > index_end >= 0: 

491 break 

492 

493 except EOFError: 

494 break 

495 

496 return images[index] 

497 

498 

499def _mass2element(mass): 

500 """ 

501 Guess the element corresponding to a given atomic mass. 

502 

503 :param mass: Atomic mass for searching. 

504 :return: Element symbol as a string. 

505 """ 

506 min_idx = np.argmin(np.abs(atomic_masses - mass)) 

507 element = chemical_symbols[min_idx] 

508 return element