Coverage for /builds/ase/ase/ase/io/bundletrajectory.py: 74.13%
576 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3"""bundletrajectory - a module for I/O from large MD simulations.
5The BundleTrajectory class writes trajectory into a directory with the
6following structure, except we now use JSON instead of pickle,
7so this text needs updating::
9 filename.bundle (dir)
10 metadata.json Data about the file format, and about which
11 data is present.
12 frames The number of frames (ascii file)
13 F0 (dir) Frame number 0
14 smalldata.ulm Small data structures in a dictionary
15 (pbc, cell, ...)
16 numbers.ulm Atomic numbers
17 positions.ulm Positions
18 momenta.ulm Momenta
19 ...
20 F1 (dir)
22There is a folder for each frame, and the data is in the ASE Ulm format.
23"""
25import os
26import shutil
27import sys
28import time
29from pathlib import Path
31import numpy as np
33from ase import Atoms
34from ase.calculators.singlepoint import (
35 PropertyNotImplementedError,
36 SinglePointCalculator,
37)
39# The system json module causes memory leaks! Use ase's own.
40# import json
41from ase.io import jsonio
42from ase.io.ulm import open as ulmopen
43from ase.parallel import barrier, paropen, world
46class BundleTrajectory:
47 """Reads and writes atoms into a .bundle directory.
49 The BundleTrajectory is an alternative way of storing
50 trajectories, intended for large-scale molecular dynamics
51 simulations, where a single flat file becomes unwieldy. Instead,
52 the data is stored in directory, a 'bundle' (the name bundle is
53 inspired from bundles in Mac OS, which are really just directories
54 the user is supposed to think of as a single file-like unit).
56 Parameters:
58 filename:
59 The name of the directory. Preferably ending in .bundle.
61 mode (optional):
62 The file opening mode. 'r' means open for reading, 'w' for
63 writing and 'a' for appending. Default: 'r'. If opening in
64 write mode, and the filename already exists, the old file is
65 renamed to .bak (any old .bak file is deleted), except if the
66 existing file is empty.
68 atoms (optional):
69 The atoms that will be written. Can only be specified in
70 write or append mode. If not specified, the atoms must be
71 given as an argument to the .write() method instead.
73 backup=True:
74 Use backup=False to disable renaming of an existing file.
76 backend='ulm':
77 Request a backend. Currently only 'ulm' is supported.
78 Only honored when writing.
80 singleprecision=False:
81 Store floating point data in single precision (ulm backend only).
82 """
83 slavelog = True # Log from all nodes
85 def __init__(self, filename, mode='r', atoms=None, backup=True,
86 backend='ulm', singleprecision=False):
87 self.state = 'constructing'
88 self.filename = filename
89 self.pre_observers = [] # callback functions before write is performed
90 self.post_observers = [] # callback functions after write is performed
91 self.master = world.rank == 0
92 self.extra_data = []
93 self.singleprecision = singleprecision
94 self._set_defaults()
95 if mode == 'r':
96 if atoms is not None:
97 raise ValueError('You cannot specify atoms in read mode.')
98 self._open_read()
99 elif mode == 'w':
100 self._open_write(atoms, backup, backend)
101 elif mode == 'a':
102 self._open_append(atoms, backend)
103 else:
104 raise ValueError('Unknown mode: ' + str(mode))
106 def _set_defaults(self):
107 "Set default values for internal parameters."
108 self.version = 1
109 self.subtype = 'normal'
110 self.datatypes = {'positions': True,
111 'numbers': 'once',
112 'tags': 'once',
113 'masses': 'once',
114 'momenta': True,
115 'forces': True,
116 'energy': True,
117 'energies': False,
118 'stress': False,
119 'magmoms': True}
121 def _set_backend(self, backend):
122 """Set the backed doing the actual I/O."""
123 if backend is not None:
124 self.backend_name = backend
126 if self.backend_name == 'ulm':
127 self.backend = UlmBundleBackend(self.master, self.singleprecision)
128 else:
129 raise NotImplementedError(
130 'This version of ASE cannot use BundleTrajectory '
131 'with backend "%s"' % self.backend_name)
133 def write(self, atoms=None):
134 """Write the atoms to the file.
136 If the atoms argument is not given, the atoms object specified
137 when creating the trajectory object is used.
138 """
139 # Check that we are in write mode
140 if self.state == 'prewrite':
141 self.state = 'write'
142 assert self.nframes == 0
143 elif self.state != 'write':
144 raise RuntimeError('Cannot write in ' + self.state + ' mode.')
146 if atoms is None:
147 atoms = self.atoms
149 # Handle NEB etc. If atoms is just a normal Atoms object, it is used
150 # as-is.
151 for image in atoms.iterimages():
152 self._write_atoms(image)
154 def _write_atoms(self, atoms):
155 "Write a single atoms object to file."
156 self._call_observers(self.pre_observers)
157 self.log('Beginning to write frame ' + str(self.nframes))
158 framedir = self._make_framedir(self.nframes)
160 # Check which data should be written the first time:
161 # Modify datatypes so any element of type 'once' becomes true
162 # for the first frame but false for subsequent frames.
163 datatypes = {}
164 for k, v in self.datatypes.items():
165 if v == 'once':
166 v = (self.nframes == 0)
167 datatypes[k] = v
169 # Write 'small' data structures. They are written jointly.
170 smalldata = {'pbc': atoms.get_pbc(),
171 'cell': atoms.get_cell(),
172 'natoms': atoms.get_global_number_of_atoms(),
173 'constraints': atoms.constraints}
174 if datatypes.get('energy'):
175 try:
176 smalldata['energy'] = atoms.get_potential_energy()
177 except (RuntimeError, PropertyNotImplementedError):
178 self.datatypes['energy'] = False
179 if datatypes.get('stress'):
180 try:
181 smalldata['stress'] = atoms.get_stress()
182 except PropertyNotImplementedError:
183 self.datatypes['stress'] = False
184 self.backend.write_small(framedir, smalldata)
186 # Write the large arrays.
187 if datatypes.get('positions'):
188 self.backend.write(framedir, 'positions', atoms.get_positions())
189 if datatypes.get('numbers'):
190 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers())
191 if datatypes.get('tags'):
192 if atoms.has('tags'):
193 self.backend.write(framedir, 'tags', atoms.get_tags())
194 else:
195 self.datatypes['tags'] = False
196 if datatypes.get('masses'):
197 if atoms.has('masses'):
198 self.backend.write(framedir, 'masses', atoms.get_masses())
199 else:
200 self.datatypes['masses'] = False
201 if datatypes.get('momenta'):
202 if atoms.has('momenta'):
203 self.backend.write(framedir, 'momenta', atoms.get_momenta())
204 else:
205 self.datatypes['momenta'] = False
206 if datatypes.get('magmoms'):
207 if atoms.has('initial_magmoms'):
208 self.backend.write(framedir, 'magmoms',
209 atoms.get_initial_magnetic_moments())
210 else:
211 self.datatypes['magmoms'] = False
212 if datatypes.get('forces'):
213 try:
214 x = atoms.get_forces()
215 except (RuntimeError, PropertyNotImplementedError):
216 self.datatypes['forces'] = False
217 else:
218 self.backend.write(framedir, 'forces', x)
219 del x
220 if datatypes.get('energies'):
221 try:
222 x = atoms.get_potential_energies()
223 except (RuntimeError, PropertyNotImplementedError):
224 self.datatypes['energies'] = False
225 else:
226 self.backend.write(framedir, 'energies', x)
227 del x
228 # Write any extra data
229 for (label, source, once) in self.extra_data:
230 if self.nframes == 0 or not once:
231 if source is not None:
232 x = source()
233 else:
234 x = atoms.get_array(label)
235 self.backend.write(framedir, label, x)
236 del x
237 if once:
238 self.datatypes[label] = 'once'
239 else:
240 self.datatypes[label] = True
241 # Finally, write metadata if it is the first frame
242 if self.nframes == 0:
243 metadata = {'datatypes': self.datatypes}
244 self._write_metadata(metadata)
245 self._write_nframes(self.nframes + 1)
246 self._call_observers(self.post_observers)
247 self.log('Done writing frame ' + str(self.nframes))
248 self.nframes += 1
250 def select_data(self, data, value):
251 """Selects if a given data type should be written.
253 Data can be written in every frame (specify True), in the
254 first frame only (specify 'only') or not at all (specify
255 False). Not all data types support the 'only' keyword, if not
256 supported it is interpreted as True.
258 The following data types are supported, the letter in parenthesis
259 indicates the default:
261 positions (T), numbers (O), tags (O), masses (O), momenta (T),
262 forces (T), energy (T), energies (F), stress (F), magmoms (T)
264 If a given property is not present during the first write, it
265 will be not be saved at all.
266 """
267 if value not in (True, False, 'once'):
268 raise ValueError('Unknown write mode')
269 if data not in self.datatypes:
270 raise ValueError('Unsupported data type: ' + data)
271 self.datatypes[data] = value
273 def set_extra_data(self, name, source=None, once=False):
274 """Adds extra data to be written.
276 Parameters:
277 name: The name of the data.
279 source (optional): If specified, a callable object returning
280 the data to be written. If not specified it is instead
281 assumed that the atoms contains the data as an array of the
282 same name.
284 once (optional): If specified and True, the data will only be
285 written to the first frame.
286 """
287 self.extra_data.append((name, source, once))
289 def close(self):
290 "Closes the trajectory."
291 self.state = 'closed'
292 lf = getattr(self, 'logfile', None)
293 self.backend.close(log=lf)
294 if lf is not None:
295 lf.close()
296 del self.logfile
298 def log(self, text):
299 """Write to the log file in the bundle.
301 Logging is only possible in write/append mode.
303 This function is mainly for internal use, but can also be called by
304 the user.
305 """
306 if not (self.master or self.slavelog):
307 return
308 text = time.asctime() + ': ' + text
309 if hasattr(self, 'logfile'):
310 # Logging enabled
311 if self.logfile is None:
312 # Logfile not yet open
313 try:
314 self.logdata.append(text)
315 except AttributeError:
316 self.logdata = [text]
317 else:
318 self.logfile.write(text + '\n')
319 self.logfile.flush()
320 else:
321 raise RuntimeError('Cannot write to log file in mode ' +
322 self.state)
324 # __getitem__ is the main reading method.
325 def __getitem__(self, n):
326 return self._read(n)
328 def _read(self, n):
329 """Read an atoms object from the BundleTrajectory."""
330 if self.state != 'read':
331 raise OSError(f'Cannot read in {self.state} mode')
332 if n < 0:
333 n += self.nframes
334 if n < 0 or n >= self.nframes:
335 raise IndexError('Trajectory index %d out of range [0, %d['
336 % (n, self.nframes))
338 framedir = os.path.join(self.filename, 'F' + str(n))
339 framezero = os.path.join(self.filename, 'F0')
340 smalldata = self.backend.read_small(framedir)
341 data = {}
342 data['pbc'] = smalldata['pbc']
343 data['cell'] = smalldata['cell']
344 data['constraint'] = smalldata['constraints']
345 if self.subtype == 'split':
346 self.backend.set_fragments(smalldata['fragments'])
347 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID')
348 else:
349 self.atom_id = None
350 atoms = Atoms(**data)
351 natoms = smalldata['natoms']
352 for name in ('positions', 'numbers', 'tags', 'masses',
353 'momenta'):
354 if self.datatypes.get(name):
355 atoms.arrays[name] = self._read_data(framezero, framedir,
356 name, self.atom_id)
357 assert len(atoms.arrays[name]) == natoms
359 # Create the atoms object
360 if self.datatypes.get('energy'):
361 if self.datatypes.get('forces'):
362 forces = self.backend.read(framedir, 'forces')
363 else:
364 forces = None
365 if self.datatypes.get('magmoms'):
366 magmoms = self.backend.read(framedir, 'magmoms')
367 else:
368 magmoms = None
369 calc = SinglePointCalculator(atoms,
370 energy=smalldata.get('energy'),
371 forces=forces,
372 stress=smalldata.get('stress'),
373 magmoms=magmoms)
374 atoms.calc = calc
375 return atoms
377 def read_extra_data(self, name, n=0):
378 """Read extra data stored alongside the atoms.
380 Currently only used to read data stored by an NPT dynamics object.
381 The data is not associated with individual atoms.
382 """
383 if self.state != 'read':
384 raise OSError(f'Cannot read extra data in {self.state} mode')
385 # Handle negative n.
386 if n < 0:
387 n += self.nframes
388 if n < 0 or n >= self.nframes:
389 raise IndexError('Trajectory index %d out of range [0, %d['
390 % (n, self.nframes))
391 framedir = os.path.join(self.filename, 'F' + str(n))
392 framezero = os.path.join(self.filename, 'F0')
393 return self._read_data(framezero, framedir, name, self.atom_id)
395 def _read_data(self, f0, f, name, atom_id):
396 "Read single data item."
398 if self.subtype == 'normal':
399 if self.datatypes[name] == 'once':
400 d = self.backend.read(f0, name)
401 else:
402 d = self.backend.read(f, name)
403 elif self.subtype == 'split':
404 if self.datatypes[name] == 'once':
405 d, issplit = self.backend.read_split(f0, name)
406 atom_id, _dummy = self.backend.read_split(f0, 'ID')
407 else:
408 d, issplit = self.backend.read_split(f, name)
409 if issplit:
410 assert atom_id is not None
411 assert len(d) == len(atom_id)
412 d[atom_id] = np.array(d)
413 return d
415 def __len__(self):
416 return self.nframes
418 def _open_log(self):
419 "Open the log file."
420 if not (self.master or self.slavelog):
421 return
422 if self.master:
423 lfn = os.path.join(self.filename, 'log.txt')
424 else:
425 lfn = os.path.join(self.filename, ('log-node%d.txt' %
426 (world.rank,)))
427 self.logfile = open(lfn, 'a', 1) # Append to log if it exists.
428 if hasattr(self, 'logdata'):
429 for text in self.logdata:
430 self.logfile.write(text + '\n')
431 self.logfile.flush()
432 del self.logdata
434 def _open_write(self, atoms, backup, backend):
435 """Open a bundle trajectory for writing.
437 Parameters:
438 atoms: Object to be written.
439 backup: (bool) Whether a backup is kept if file already exists.
440 backend: Which type of backend to use.
442 Note that the file name of the bundle is already stored as an attribute.
443 """
444 self._set_backend(backend)
445 self.logfile = None # enable delayed logging
446 self.atoms = atoms
447 if os.path.exists(self.filename):
448 # The output directory already exists.
449 barrier() # all must have time to see it exists
450 if not self.is_bundle(self.filename, allowempty=True):
451 raise OSError(
452 'Filename "' + self.filename +
453 '" already exists, but is not a BundleTrajectory.' +
454 ' Cowardly refusing to remove it.')
455 if self.is_empty_bundle(self.filename):
456 barrier()
457 self.log(f'Deleting old "{self.filename}" as it is empty')
458 self.delete_bundle(self.filename)
459 elif not backup:
460 barrier()
461 self.log(
462 f'Deleting old "{self.filename}" as backup is turned off.')
463 self.delete_bundle(self.filename)
464 else:
465 barrier()
466 # Make a backup file
467 bakname = self.filename + '.bak'
468 if os.path.exists(bakname):
469 barrier() # All must see it exists
470 self.log(f'Deleting old backup file "{bakname}"')
471 self.delete_bundle(bakname)
472 self.log(f'Renaming "{self.filename}" to "{bakname}"')
473 self._rename_bundle(self.filename, bakname)
474 # Ready to create a new bundle.
475 barrier()
476 self.log(f'Creating new "{self.filename}"')
477 self._make_bundledir(self.filename)
478 self.state = 'prewrite'
479 self._write_metadata({})
480 self._write_nframes(0) # Mark new bundle as empty
481 self._open_log()
482 self.nframes = 0
484 def _open_read(self):
485 "Open a bundle trajectory for reading."
486 if not os.path.exists(self.filename):
487 raise OSError('File not found: ' + self.filename)
488 if not self.is_bundle(self.filename):
489 raise OSError('Not a BundleTrajectory: ' + self.filename)
490 self.state = 'read'
491 # Read the metadata
492 metadata = self._read_metadata()
493 self.metadata = metadata
494 if metadata['version'] > self.version:
495 raise NotImplementedError(
496 'This version of ASE cannot read a BundleTrajectory version ' +
497 str(metadata['version']))
498 if metadata['subtype'] not in ('normal', 'split'):
499 raise NotImplementedError(
500 'This version of ASE cannot read BundleTrajectory subtype ' +
501 metadata['subtype'])
502 self.subtype = metadata['subtype']
503 if metadata['backend'] == 'ulm':
504 self.singleprecision = metadata['ulm.singleprecision']
505 self._set_backend(metadata['backend'])
506 self.nframes = self._read_nframes()
507 if self.nframes == 0:
508 raise OSError('Empty BundleTrajectory')
509 self.datatypes = metadata['datatypes']
510 try:
511 self.pythonmajor = metadata['python_ver'][0]
512 except KeyError:
513 self.pythonmajor = 2 # Assume written with Python 2.
514 # We need to know if we are running Python 3.X and try to read
515 # a bundle written with Python 2.X
516 self.backend.readpy2 = (self.pythonmajor == 2)
517 self.state = 'read'
519 def _open_append(self, atoms, backend):
520 """Open a trajectory for writing in append mode.
522 If there is no pre-existing bundle, it is just opened in write mode
523 instead.
525 Parameters:
526 atoms: Object to be written.
527 backend: The backend to be used if a new bundle is opened. Ignored
528 if we append to existing bundle, as the backend cannot be
529 changed.
531 The filename is already stored as an attribute.
532 """
533 if not os.path.exists(self.filename):
534 # OK, no old bundle. Open as for write instead.
535 barrier()
536 self._open_write(atoms, False, backend)
537 return
538 if not self.is_bundle(self.filename):
539 raise OSError('Not a BundleTrajectory: ' + self.filename)
540 self.state = 'read'
541 metadata = self._read_metadata()
542 self.metadata = metadata
543 if metadata['version'] != self.version:
544 raise NotImplementedError(
545 'Cannot append to a BundleTrajectory version '
546 '%s (supported version is %s)' % (str(metadata['version']),
547 str(self.version)))
548 if metadata['subtype'] not in ('normal', 'split'):
549 raise NotImplementedError(
550 'This version of ASE cannot append to BundleTrajectory '
551 'subtype ' + metadata['subtype'])
552 self.subtype = metadata['subtype']
553 if metadata['backend'] == 'ulm':
554 self.singleprecision = metadata['ulm.singleprecision']
555 self._set_backend(metadata['backend'])
556 self.nframes = self._read_nframes()
557 self._open_log()
558 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename,
559 self.nframes))
560 self.state = 'write'
561 self.atoms = atoms
563 @property
564 def path(self):
565 return Path(self.filename)
567 @property
568 def metadata_path(self):
569 return self.path / 'metadata.json'
571 def _write_nframes(self, n):
572 "Write the number of frames in the bundle."
573 assert self.state == 'write' or self.state == 'prewrite'
574 with paropen(self.path / 'frames', 'w') as fd:
575 fd.write(str(n) + '\n')
577 def _read_nframes(self):
578 "Read the number of frames."
579 return int((self.path / 'frames').read_text())
581 def _write_metadata(self, metadata):
582 """Write the metadata file of the bundle.
584 Modifies the medadata dictionary!
585 """
586 # Add standard fields that must always be present.
587 assert self.state == 'write' or self.state == 'prewrite'
588 metadata['format'] = 'BundleTrajectory'
589 metadata['version'] = self.version
590 metadata['subtype'] = self.subtype
591 metadata['backend'] = self.backend_name
592 if self.backend_name == 'ulm':
593 metadata['ulm.singleprecision'] = self.singleprecision
594 metadata['python_ver'] = tuple(sys.version_info)
595 encode = jsonio.MyEncoder(indent=4).encode
596 fido = encode(metadata)
597 with paropen(self.metadata_path, 'w') as fd:
598 fd.write(fido)
600 def _read_metadata(self):
601 """Read the metadata."""
602 assert self.state == 'read'
603 return jsonio.decode(self.metadata_path.read_text())
605 @staticmethod
606 def is_bundle(filename, allowempty=False):
607 """Check if a filename exists and is a BundleTrajectory.
609 If allowempty=True, an empty folder is regarded as an
610 empty BundleTrajectory."""
611 filename = Path(filename)
612 if not filename.is_dir():
613 return False
614 if allowempty and not os.listdir(filename):
615 return True # An empty BundleTrajectory
616 metaname = filename / 'metadata.json'
617 if metaname.is_file():
618 mdata = jsonio.decode(metaname.read_text())
619 else:
620 return False
622 try:
623 return mdata['format'] == 'BundleTrajectory'
624 except KeyError:
625 return False
627 @staticmethod
628 def is_empty_bundle(filename):
629 """Check if a filename is an empty bundle.
631 Assumes that it is a bundle."""
632 if not os.listdir(filename):
633 return True # Empty folders are empty bundles.
634 with open(os.path.join(filename, 'frames'), 'rb') as fd:
635 nframes = int(fd.read())
637 # File may be removed by the master immediately after this.
638 barrier()
639 return nframes == 0
641 @classmethod
642 def delete_bundle(cls, filename):
643 "Deletes a bundle."
644 if world.rank == 0:
645 # Only the master deletes
646 if not cls.is_bundle(filename, allowempty=True):
647 raise OSError(
648 f'Cannot remove "{filename}" as it is not a bundle '
649 'trajectory.'
650 )
651 if os.path.islink(filename):
652 # A symbolic link to a bundle. Only remove the link.
653 os.remove(filename)
654 else:
655 # A real bundle
656 shutil.rmtree(filename)
657 else:
658 # All other tasks wait for the directory to go away.
659 while os.path.exists(filename):
660 time.sleep(1)
661 # The master may not proceed before all tasks have seen the
662 # directory go away, as it might otherwise create a new bundle
663 # with the same name, fooling the wait loop in _make_bundledir.
664 barrier()
666 def _rename_bundle(self, oldname, newname):
667 "Rename a bundle. Used to create the .bak"
668 if self.master:
669 os.rename(oldname, newname)
670 else:
671 while os.path.exists(oldname):
672 time.sleep(1)
673 # The master may not proceed before all tasks have seen the
674 # directory go away.
675 barrier()
677 def _make_bundledir(self, filename):
678 """Make the main bundle directory.
680 Since all MPI tasks might write to it, all tasks must wait for
681 the directory to appear.
682 """
683 self.log('Making directory ' + filename)
684 assert not os.path.isdir(filename)
685 barrier()
686 if self.master:
687 os.mkdir(filename)
688 else:
689 i = 0
690 while not os.path.isdir(filename):
691 time.sleep(1)
692 i += 1
693 if i > 10:
694 self.log('Waiting %d seconds for %s to appear!'
695 % (i, filename))
697 def _make_framedir(self, frame):
698 """Make subdirectory for the frame.
700 As only the master writes to it, no synchronization between
701 MPI tasks is necessary.
702 """
703 framedir = os.path.join(self.filename, 'F' + str(frame))
704 if self.master:
705 self.log('Making directory ' + framedir)
706 os.mkdir(framedir)
707 return framedir
709 def pre_write_attach(self, function, interval=1, *args, **kwargs):
710 """Attach a function to be called before writing begins.
712 function: The function or callable object to be called.
714 interval: How often the function is called. Default: every time (1).
716 All other arguments are stored, and passed to the function.
717 """
718 if not callable(function):
719 raise ValueError('Callback object must be callable.')
720 self.pre_observers.append((function, interval, args, kwargs))
722 def post_write_attach(self, function, interval=1, *args, **kwargs):
723 """Attach a function to be called after writing ends.
725 function: The function or callable object to be called.
727 interval: How often the function is called. Default: every time (1).
729 All other arguments are stored, and passed to the function.
730 """
731 if not callable(function):
732 raise ValueError('Callback object must be callable.')
733 self.post_observers.append((function, interval, args, kwargs))
735 def _call_observers(self, obs):
736 "Call pre/post write observers."
737 for function, interval, args, kwargs in obs:
738 if (self.nframes + 1) % interval == 0:
739 function(*args, **kwargs)
742class UlmBundleBackend:
743 """Backend for BundleTrajectories stored as ASE Ulm files."""
745 def __init__(self, master, singleprecision):
746 # Store if this backend will actually write anything
747 self.writesmall = master
748 self.writelarge = master
749 self.singleprecision = singleprecision
751 # Integer data may be downconverted to the following types
752 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16',
753 'uint32', 'int32', 'uint64', 'int64']
754 # Dict comprehensions not supported in Python 2.6 :-(
755 self.int_dtype = {k: getattr(np, k)
756 for k in self.integral_dtypes}
757 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min
758 for k in self.integral_dtypes}
759 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max
760 for k in self.integral_dtypes}
761 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize
762 for k in self.integral_dtypes}
764 def write_small(self, framedir, smalldata):
765 "Write small data to be written jointly."
766 if self.writesmall:
767 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd:
768 fd.write(**smalldata)
770 def write(self, framedir, name, data):
771 "Write data to separate file."
772 if self.writelarge:
773 shape = data.shape
774 dtype = str(data.dtype)
775 stored_as = dtype
776 all_identical = False
777 # Check if it a type that can be stored with less space
778 if np.issubdtype(data.dtype, np.integer):
779 # An integer type, we may want to convert
780 minval = data.min()
781 maxval = data.max()
783 # ulm cannot write np.bool_:
784 all_identical = bool(minval == maxval)
785 if all_identical:
786 data = int(data.flat[0]) # Convert to standard integer
787 else:
788 for typ in self.integral_dtypes:
789 if (minval >= self.int_minval[typ] and
790 maxval <= self.int_maxval[typ] and
791 data.itemsize > self.int_itemsize[typ]):
793 # Convert to smaller type
794 stored_as = typ
795 data = data.astype(self.int_dtype[typ])
796 elif data.dtype == np.float32 or data.dtype == np.float64:
797 all_identical = bool(data.min() == data.max())
798 if all_identical:
799 data = float(data.flat[0]) # Convert to standard float
800 elif data.dtype == np.float64 and self.singleprecision:
801 # Downconvert double to single precision
802 stored_as = 'float32'
803 data = data.astype(np.float32)
804 fn = os.path.join(framedir, name + '.ulm')
805 with ulmopen(fn, 'w') as fd:
806 fd.write(shape=shape,
807 dtype=dtype,
808 stored_as=stored_as,
809 all_identical=all_identical,
810 data=data)
812 def read_small(self, framedir):
813 "Read small data."
814 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd:
815 return fd.asdict()
817 def read(self, framedir, name):
818 "Read data from separate file."
819 fn = os.path.join(framedir, name + '.ulm')
820 with ulmopen(fn, 'r') as fd:
821 if fd.all_identical:
822 # Only a single data value
823 data = np.zeros(fd.shape,
824 dtype=getattr(np, fd.dtype)) + fd.data
825 elif fd.dtype == fd.stored_as:
826 # Easy, the array can be returned as-is.
827 data = fd.data
828 else:
829 # Cast the data back
830 data = fd.data.astype(getattr(np, fd.dtype))
831 return data
833 def read_info(self, framedir, name, split=None):
834 """Read information about file contents without reading the data.
836 Information is a dictionary containing as aminimum the shape and
837 type.
838 """
839 fn = os.path.join(framedir, name + '.ulm')
840 if split is None or os.path.exists(fn):
841 with ulmopen(fn, 'r') as fd:
842 info = {}
843 info['shape'] = fd.shape
844 info['type'] = fd.dtype
845 info['stored_as'] = fd.stored_as
846 info['identical'] = fd.all_identical
847 return info
848 else:
849 info = {}
850 for i in range(split):
851 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm')
852 with ulmopen(fn, 'r') as fd:
853 if i == 0:
854 info['shape'] = list(fd.shape)
855 info['type'] = fd.dtype
856 info['stored_as'] = fd.stored_as
857 info['identical'] = fd.all_identical
858 else:
859 info['shape'][0] += fd.shape[0]
860 assert info['type'] == fd.dtype
861 info['identical'] = (info['identical']
862 and fd.all_identical)
863 info['shape'] = tuple(info['shape'])
864 return info
866 def set_fragments(self, nfrag):
867 self.nfrag = nfrag
869 def read_split(self, framedir, name):
870 """Read data from multiple files.
872 Falls back to reading from single file if that is how data is stored.
874 Returns the data and an object indicating if the data was really
875 read from split files. The latter object is False if not
876 read from split files, but is an array of the segment length if
877 split files were used.
878 """
879 data = []
880 if os.path.exists(os.path.join(framedir, name + '.ulm')):
881 # Not stored in split form!
882 return (self.read(framedir, name), False)
883 for i in range(self.nfrag):
884 suf = '_%d' % (i,)
885 data.append(self.read(framedir, name + suf))
886 seglengths = [len(d) for d in data]
887 return (np.concatenate(data), seglengths)
889 def close(self, log=None):
890 """Close anything that needs to be closed by the backend.
892 The default backend does nothing here.
893 """
896def read_bundletrajectory(filename, index=-1):
897 """Reads one or more atoms objects from a BundleTrajectory.
899 Arguments:
901 filename: str
902 The name of the bundle (really a directory!)
903 index: int
904 An integer specifying which frame to read, or an index object
905 for reading multiple frames. Default: -1 (reads the last
906 frame).
907 """
908 traj = BundleTrajectory(filename, mode='r')
909 if isinstance(index, int):
910 yield traj[index]
912 for i in range(*index.indices(len(traj))):
913 yield traj[i]
916def write_bundletrajectory(filename, images, append=False):
917 """Write image(s) to a BundleTrajectory.
919 Write also energy, forces, and stress if they are already
920 calculated.
921 """
923 if append:
924 mode = 'a'
925 else:
926 mode = 'w'
927 traj = BundleTrajectory(filename, mode=mode)
929 if hasattr(images, 'get_positions'):
930 images = [images]
932 for atoms in images:
933 # Avoid potentially expensive calculations:
934 calc = atoms.calc
935 if hasattr(calc, 'calculation_required'):
936 for quantity in ('energy', 'forces', 'stress', 'magmoms'):
937 traj.select_data(quantity,
938 not calc.calculation_required(atoms,
939 [quantity]))
940 traj.write(atoms)
941 traj.close()
944def print_bundletrajectory_info(filename):
945 """Prints information about a BundleTrajectory.
947 Mainly intended to be called from a command line tool.
948 """
949 if not BundleTrajectory.is_bundle(filename):
950 raise ValueError('Not a BundleTrajectory!')
951 if BundleTrajectory.is_empty_bundle(filename):
952 print(filename, 'is an empty BundleTrajectory.')
953 return
954 # Read the metadata
955 fn = os.path.join(filename, 'metadata.json')
956 with open(fn) as fd:
957 metadata = jsonio.decode(fd.read())
959 print(f'Metadata information of BundleTrajectory "{filename}":')
960 for k, v in metadata.items():
961 if k != 'datatypes':
962 print(f" {k}: {v}")
963 with open(os.path.join(filename, 'frames'), 'rb') as fd:
964 nframes = int(fd.read())
965 print('Number of frames: %i' % (nframes,))
966 print('Data types:')
967 for k, v in metadata['datatypes'].items():
968 if v == 'once':
969 print(f' {k}: First frame only.')
970 elif v:
971 print(f' {k}: All frames.')
972 # Look at first frame
973 if metadata['backend'] == 'ulm':
974 backend = UlmBundleBackend(True, False)
975 else:
976 raise NotImplementedError(
977 f'Backend {metadata["backend"]} not supported.')
978 frame = os.path.join(filename, 'F0')
979 small = backend.read_small(frame)
980 print('Contents of first frame:')
981 for k, v in small.items():
982 if k == 'constraints':
983 if v:
984 print(f' {len(v)} constraints are present')
985 else:
986 print(' Constraints are absent.')
987 elif k == 'pbc':
988 print(f' Periodic boundary conditions: {v!s}')
989 elif k == 'natoms':
990 print(' Number of atoms: %i' % (v,))
991 elif hasattr(v, 'shape'):
992 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}')
993 if k == 'cell':
994 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0]))
995 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1]))
996 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2]))
997 else:
998 print(f' {k}: {v!s}')
999 # Read info from separate files.
1000 if metadata['subtype'] == 'split':
1001 nsplit = small['fragments']
1002 else:
1003 nsplit = False
1004 for k, v in metadata['datatypes'].items():
1005 if v and k not in small:
1006 info = backend.read_info(frame, k, nsplit)
1007 infoline = f' {k}: '
1008 for k, v in info.items():
1009 infoline += f'{k} = {v!s}, '
1010 infoline = infoline[:-2] + '.' # Fix punctuation.
1011 print(infoline)
1014def main():
1015 import optparse
1016 parser = optparse.OptionParser(
1017 usage='python -m ase.io.bundletrajectory '
1018 'a.bundle [b.bundle ...]',
1019 description='Print information about '
1020 'the contents of one or more bundletrajectories.')
1021 _opts, args = parser.parse_args()
1022 for name in args:
1023 print_bundletrajectory_info(name)
1026if __name__ == '__main__':
1027 main()