Coverage for /builds/ase/ase/ase/io/lammpsrun.py: 88.24%

1# fmt: off

3import gzip

4import struct

5from collections import deque

6from os.path import splitext

8import numpy as np

10from ase.atoms import Atoms

11from ase.calculators.lammps import convert

12from ase.calculators.singlepoint import SinglePointCalculator

13from ase.data import atomic_masses, chemical_symbols

14from ase.parallel import paropen

17def read_lammps_dump(infileobj, **kwargs):

18 """Method which reads a LAMMPS dump file.

20 LAMMPS chooses output method depending on the given suffix:

21 - .bin : binary file

22 - .gz : output piped through gzip

23 - .mpiio: using mpiio (should be like cleartext,

24 with different ordering)

25 - else : normal clear-text format

27 :param infileobj: string to file, opened file or file-like stream

29 """

30 # !TODO: add support for lammps-regex naming schemes (output per

31 # processor and timestep wildcards)

33 opened = False

34 if isinstance(infileobj, str):

35 opened = True

36 suffix = splitext(infileobj)[-1]

37 if suffix == ".bin":

38 fileobj = paropen(infileobj, "rb")

39 elif suffix == ".gz":

40 # !TODO: save for parallel execution?

41 fileobj = gzip.open(infileobj, "rb")

42 else:

43 fileobj = paropen(infileobj)

44 else:

45 suffix = splitext(infileobj.name)[-1]

46 fileobj = infileobj

48 if suffix == ".bin":

49 out = read_lammps_dump_binary(fileobj, **kwargs)

50 if opened:

51 fileobj.close()

52 return out

54 out = read_lammps_dump_text(fileobj, **kwargs)

56 if opened:

57 fileobj.close()

59 return out

62def lammps_data_to_ase_atoms(

63 data,

64 colnames,

65 cell,

66 celldisp,

67 pbc=False,

68 atomsobj=Atoms,

69 order=True,

70 specorder=None,

71 prismobj=None,

72 units="metal",

73):

74 """Extract positions and other per-atom parameters and create Atoms

76 :param data: per atom data

77 :param colnames: index for data

78 :param cell: cell dimensions

79 :param celldisp: origin shift

80 :param pbc: periodic boundaries

81 :param atomsobj: function to create ase-Atoms object

82 :param order: sort atoms by id. Might be faster to turn off.

83 Disregarded in case `id` column is not given in file.

84 :param specorder: list of species to map lammps types to ase-species

85 (usually .dump files to not contain type to species mapping)

86 :param prismobj: Coordinate transformation between lammps and ase

87 :type prismobj: Prism

88 :param units: lammps units for unit transformation between lammps and ase

89 :returns: Atoms object

90 :rtype: Atoms

92 """

93 if len(data.shape) == 1:

94 data = data[np.newaxis, :]

96 # read IDs if given and order if needed

97 if "id" in colnames:

98 ids = data[:, colnames.index("id")].astype(int)

99 if order:

100 sort_order = np.argsort(ids)

101 data = data[sort_order, :]

102

103 # determine the elements

104 if "element" in colnames:

105 # priority to elements written in file

106 elements = data[:, colnames.index("element")]

107 elif "mass" in colnames:

108 # try to determine elements from masses

109 elements = [

110 _mass2element(m)

111 for m in data[:, colnames.index("mass")].astype(float)

112 ]

113 elif "type" in colnames:

114 # fall back to `types` otherwise

115 elements = data[:, colnames.index("type")].astype(int)

116

117 # reconstruct types from given specorder

118 if specorder:

119 elements = [specorder[t - 1] for t in elements]

120 else:

121 # todo: what if specorder give but no types?

122 # in principle the masses could work for atoms, but that needs

123 # lots of cases and new code I guess

124 raise ValueError("Cannot determine atom types form LAMMPS dump file")

125

126 def get_quantity(labels, quantity=None):

127 try:

128 cols = [colnames.index(label) for label in labels]

129 if quantity:

130 return convert(data[:, cols].astype(float), quantity,

131 units, "ASE")

132

133 return data[:, cols].astype(float)

134 except ValueError:

135 return None

136

137 # Positions

138 positions = None

139 scaled_positions = None

140 if "x" in colnames:

141 # doc: x, y, z = unscaled atom coordinates

142 positions = get_quantity(["x", "y", "z"], "distance")

143 elif "xs" in colnames:

144 # doc: xs,ys,zs = scaled atom coordinates

145 scaled_positions = get_quantity(["xs", "ys", "zs"])

146 elif "xu" in colnames:

147 # doc: xu,yu,zu = unwrapped atom coordinates

148 positions = get_quantity(["xu", "yu", "zu"], "distance")

149 elif "xsu" in colnames:

150 # xsu,ysu,zsu = scaled unwrapped atom coordinates

151 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])

152 else:

153 raise ValueError("No atomic positions found in LAMMPS output")

154

155 velocities = get_quantity(["vx", "vy", "vz"], "velocity")

156 charges = get_quantity(["q"], "charge")

157 forces = get_quantity(["fx", "fy", "fz"], "force")

158 # !TODO: how need quaternions be converted?

159 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])

160

161 # convert cell

162 cell = convert(cell, "distance", units, "ASE")

163 celldisp = convert(celldisp, "distance", units, "ASE")

164 if prismobj:

165 celldisp = prismobj.vector_to_ase(celldisp)

166 cell = prismobj.update_cell(cell)

167

168 if quaternions is not None:

169 out_atoms = atomsobj(

170 symbols=elements,

171 positions=positions,

172 cell=cell,

173 celldisp=celldisp,

174 pbc=pbc,

175 )

176 out_atoms.new_array('quaternions', quaternions, dtype=float)

177 elif positions is not None:

178 # reverse coordinations transform to lammps system

179 # (for all vectors = pos, vel, force)

180 if prismobj:

181 positions = prismobj.vector_to_ase(positions, wrap=True)

182

183 out_atoms = atomsobj(

184 symbols=elements,

185 positions=positions,

186 pbc=pbc,

187 celldisp=celldisp,

188 cell=cell

189 )

190 elif scaled_positions is not None:

191 out_atoms = atomsobj(

192 symbols=elements,

193 scaled_positions=scaled_positions,

194 pbc=pbc,

195 celldisp=celldisp,

196 cell=cell,

197 )

198

199 if velocities is not None:

200 if prismobj:

201 velocities = prismobj.vector_to_ase(velocities)

202 out_atoms.set_velocities(velocities)

203 if charges is not None:

204 out_atoms.set_initial_charges([charge[0] for charge in charges])

205 if forces is not None:

206 if prismobj:

207 forces = prismobj.vector_to_ase(forces)

208 # !TODO: use another calculator if available (or move forces

209 # to atoms.property) (other problem: synchronizing

210 # parallel runs)

211 calculator = SinglePointCalculator(out_atoms, energy=0.0,

212 forces=forces)

213 out_atoms.calc = calculator

214

215 # process the extra columns of fixes, variables and computes

216 # that can be dumped, add as additional arrays to atoms object

217 for colname in colnames:

218 # determine if it is a compute, fix or

219 # custom property/atom (but not the quaternian)

220 if (colname.startswith('f_') or colname.startswith('v_') or

221 colname.startswith('d_') or colname.startswith('d2_') or

222 (colname.startswith('c_') and not colname.startswith('c_q['))):

223 out_atoms.new_array(colname, get_quantity([colname]),

224 dtype='float')

225

226 elif colname.startswith('i_') or colname.startswith('i2_'):

227 out_atoms.new_array(colname, get_quantity([colname]),

228 dtype='int')

229

230 return out_atoms

231

232

233def construct_cell(diagdisp, offdiag):

234 """Help function to create an ASE-cell with displacement vector from

235 the lammps coordination system parameters.

236

237 :param diagdisp: cell dimension convoluted with the displacement vector

238 :param offdiag: off-diagonal cell elements

239 :returns: cell and cell displacement vector

240 :rtype: tuple

241 """

242 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp

243 xy, xz, yz = offdiag

244

245 # create ase-cell from lammps-box

246 xhilo = (xhi - xlo) - abs(xy) - abs(xz)

247 yhilo = (yhi - ylo) - abs(yz)

248 zhilo = zhi - zlo

249 celldispx = xlo - min(0, xy) - min(0, xz)

250 celldispy = ylo - min(0, yz)

251 celldispz = zlo

252 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])

253 celldisp = np.array([celldispx, celldispy, celldispz])

254

255 return cell, celldisp

256

257

258def get_max_index(index):

259 if np.isscalar(index):

260 return index

261 elif isinstance(index, slice):

262 return index.stop if (index.stop is not None) else float("inf")

263

264

265def read_lammps_dump_text(fileobj, index=-1, **kwargs):

266 """Process cleartext lammps dumpfiles

267

268 :param fileobj: filestream providing the trajectory data

269 :param index: integer or slice object (default: get the last timestep)

270 :returns: list of Atoms objects

271 :rtype: list

272 """

273 # Load all dumped timesteps into memory simultaneously

274 lines = deque(fileobj.readlines())

275 index_end = get_max_index(index)

276

277 n_atoms = 0

278 images = []

279

280 # avoid references before assignment in case of incorrect file structure

281 cell, celldisp, pbc, info = None, None, False, {}

282

283 while len(lines) > n_atoms:

284 line = lines.popleft()

285

286 if "ITEM: TIMESTEP" in line:

287 line = lines.popleft()

288 # !TODO: pyflakes complains about this line -> do something

289 ntimestep = int(line.split()[0]) # NOQA

290 info["timestep"] = ntimestep

291

292 if "ITEM: NUMBER OF ATOMS" in line:

293 line = lines.popleft()

294 n_atoms = int(line.split()[0])

295

296 if "ITEM: BOX BOUNDS" in line:

297 # save labels behind "ITEM: BOX BOUNDS" in triclinic case

298 # (>=lammps-7Jul09)

299 tilt_items = line.split()[3:]

300 celldatarows = [lines.popleft() for _ in range(3)]

301 celldata = np.loadtxt(celldatarows)

302 diagdisp = celldata[:, :2].reshape(6, 1).flatten()

303

304 # determine cell tilt (triclinic case!)

305 if len(celldata[0]) > 2:

306 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"

307 # to assign tilt (vector) elements ...

308 offdiag = celldata[:, 2]

309 # ... otherwise assume default order in 3rd column

310 # (if the latter was present)

311 if len(tilt_items) >= 3:

312 sort_index = [tilt_items.index(i)

313 for i in ["xy", "xz", "yz"]]

314 offdiag = offdiag[sort_index]

315 else:

316 offdiag = (0.0,) * 3

317

318 cell, celldisp = construct_cell(diagdisp, offdiag)

319

320 # Handle pbc conditions

321 if len(tilt_items) == 3:

322 pbc_items = tilt_items

323 elif len(tilt_items) > 3:

324 pbc_items = tilt_items[3:6]

325 else:

326 pbc_items = ["f", "f", "f"]

327 pbc = ["p" in d.lower() for d in pbc_items]

328

329 if "ITEM: ATOMS" in line:

330 colnames = line.split()[2:]

331 datarows = [lines.popleft() for _ in range(n_atoms)]

332 data = np.loadtxt(datarows, dtype=str, ndmin=2)

333 out_atoms = lammps_data_to_ase_atoms(

334 data=data,

335 colnames=colnames,

336 cell=cell,

337 celldisp=celldisp,

338 atomsobj=Atoms,

339 pbc=pbc,

340 **kwargs,

341 )

342 out_atoms.info.update(info)

343 images.append(out_atoms)

344

345 if len(images) > index_end >= 0:

346 break

347

348 return images[index]

349

350

351def read_lammps_dump_binary(

352 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs

353):

354 """Read binary dump-files (after binary2txt.cpp from lammps/tools)

355

356 :param fileobj: file-stream containing the binary lammps data

357 :param index: integer or slice object (default: get the last timestep)

358 :param colnames: data is columns and identified by a header

359 :param intformat: lammps support different integer size. Parameter set \

360 at compile-time and can unfortunately not derived from data file

361 :returns: list of Atoms-objects

362 :rtype: list

363 """

364 # depending on the chosen compilation flag lammps uses either normal

365 # integers or long long for its id or timestep numbering

366 # !TODO: tags are cast to double -> missing/double ids (add check?)

367 _tagformat, bigformat = dict(

368 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")

369 )[intformat]

370

371 index_end = get_max_index(index)

372

373 # Standard columns layout from lammpsrun

374 if not colnames:

375 colnames = ["id", "type", "x", "y", "z",

376 "vx", "vy", "vz", "fx", "fy", "fz"]

377

378 images = []

379

380 # wrap struct.unpack to raise EOFError

381 def read_variables(string):

382 obj_len = struct.calcsize(string)

383 data_obj = fileobj.read(obj_len)

384 if obj_len != len(data_obj):

385 raise EOFError

386 return struct.unpack(string, data_obj)

387

388 while True:

389 try:

390 # Assume that the binary dump file is in the old (pre-29Oct2020)

391 # format

392 magic_string = None

393

394 # read header

395 ntimestep, = read_variables("=" + bigformat)

396

397 # In the new LAMMPS binary dump format (version 29Oct2020 and

398 # onward), a negative timestep is used to indicate that the next

399 # few bytes will contain certain metadata

400 if ntimestep < 0:

401 # First bigint was actually encoding the negative of the format

402 # name string length (we call this 'magic_string' to

403 magic_string_len = -ntimestep

404

405 # The next `magic_string_len` bytes will hold a string

406 # indicating the format of the dump file

407 magic_string = b''.join(read_variables(

408 "=" + str(magic_string_len) + "c"))

409

410 # Read endianness (integer). For now, we'll disregard the value

411 # and simply use the host machine's endianness (via '='

412 # character used with struct.calcsize).

413 #

414 # TODO: Use the endianness of the dump file in subsequent

415 # read_variables rather than just assuming it will match

416 # that of the host

417 read_variables("=i")

418

419 # Read revision number (integer)

420 revision, = read_variables("=i")

421

422 # Finally, read the actual timestep (bigint)

423 ntimestep, = read_variables("=" + bigformat)

424

425 _n_atoms, triclinic = read_variables("=" + bigformat + "i")

426 boundary = read_variables("=6i")

427 diagdisp = read_variables("=6d")

428 if triclinic != 0:

429 offdiag = read_variables("=3d")

430 else:

431 offdiag = (0.0,) * 3

432 size_one, = read_variables("=i")

433

434 if len(colnames) != size_one:

435 raise ValueError("Provided columns do not match binary file")

436

437 if magic_string and revision > 1:

438 # New binary dump format includes units string,

439 # columns string, and time

440 units_str_len, = read_variables("=i")

441

442 if units_str_len > 0:

443 # Read lammps units style

444 _ = b''.join(

445 read_variables("=" + str(units_str_len) + "c"))

446

447 flag, = read_variables("=c")

448 if flag != b'\x00':

449 # Flag was non-empty string

450 read_variables("=d")

451

452 # Length of column string

453 columns_str_len, = read_variables("=i")

454

455 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")

456 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))

457

458 nchunk, = read_variables("=i")

459

460 # lammps cells/boxes can have different boundary conditions on each

461 # sides (makes mainly sense for different non-periodic conditions

462 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))

463 # periodic case: b 0 = 'p'

464 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'

465 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0

466

467 cell, celldisp = construct_cell(diagdisp, offdiag)

468

469 data = []

470 for _ in range(nchunk):

471 # number-of-data-entries

472 n_data, = read_variables("=i")

473 # retrieve per atom data

474 data += read_variables("=" + str(n_data) + "d")

475 data = np.array(data).reshape((-1, size_one))

476

477 # map data-chunk to ase atoms

478 out_atoms = lammps_data_to_ase_atoms(

479 data=data,

480 colnames=colnames,

481 cell=cell,

482 celldisp=celldisp,

483 pbc=pbc,

484 **kwargs

485 )

486

487 images.append(out_atoms)

488

489 # stop if requested index has been found

490 if len(images) > index_end >= 0:

491 break

492

493 except EOFError:

494 break

495

496 return images[index]

497

498

499def _mass2element(mass):

500 """

501 Guess the element corresponding to a given atomic mass.

502

503 :param mass: Atomic mass for searching.

504 :return: Element symbol as a string.

505 """

506 min_idx = np.argmin(np.abs(atomic_masses - mass))

507 element = chemical_symbols[min_idx]

508 return element