Coverage for ase / io / bundletrajectory.py: 74.04%
574 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
1# fmt: off
3"""bundletrajectory - a module for I/O from large MD simulations.
5The BundleTrajectory class writes trajectory into a directory with the
6following structure, except we now use JSON instead of pickle,
7so this text needs updating::
9 filename.bundle (dir)
10 metadata.json Data about the file format, and about which
11 data is present.
12 frames The number of frames (ascii file)
13 F0 (dir) Frame number 0
14 smalldata.ulm Small data structures in a dictionary
15 (pbc, cell, ...)
16 numbers.ulm Atomic numbers
17 positions.ulm Positions
18 momenta.ulm Momenta
19 ...
20 F1 (dir)
22There is a folder for each frame, and the data is in the ASE Ulm format.
23"""
25import os
26import shutil
27import sys
28import time
29from pathlib import Path
31import numpy as np
33from ase import Atoms
34from ase.calculators.singlepoint import (
35 PropertyNotImplementedError,
36 SinglePointCalculator,
37)
39# The system json module causes memory leaks! Use ase's own.
40# import json
41from ase.io import jsonio
42from ase.io.ulm import open as ulmopen
43from ase.parallel import barrier, paropen, world
46class BundleTrajectory:
47 """Reads and writes atoms into a .bundle directory.
49 The BundleTrajectory is an alternative way of storing
50 trajectories, intended for large-scale molecular dynamics
51 simulations, where a single flat file becomes unwieldy. Instead,
52 the data is stored in directory, a 'bundle' (the name bundle is
53 inspired from bundles in Mac OS, which are really just directories
54 the user is supposed to think of as a single file-like unit).
56 Parameters
57 ----------
59 filename:
60 The name of the directory. Preferably ending in .bundle.
62 mode (optional):
63 The file opening mode. 'r' means open for reading, 'w' for
64 writing and 'a' for appending. Default: 'r'. If opening in
65 write mode, and the filename already exists, the old file is
66 renamed to .bak (any old .bak file is deleted), except if the
67 existing file is empty.
69 atoms (optional):
70 The atoms that will be written. Can only be specified in
71 write or append mode. If not specified, the atoms must be
72 given as an argument to the .write() method instead.
74 backup=True:
75 Use backup=False to disable renaming of an existing file.
77 backend='ulm':
78 Request a backend. Currently only 'ulm' is supported.
79 Only honored when writing.
81 singleprecision=False:
82 Store floating point data in single precision (ulm backend only).
83 """
84 slavelog = True # Log from all nodes
86 def __init__(self, filename, mode='r', atoms=None, backup=True,
87 backend='ulm', singleprecision=False):
88 self.state = 'constructing'
89 self.filename = filename
90 self.pre_observers = [] # callback functions before write is performed
91 self.post_observers = [] # callback functions after write is performed
92 self.master = world.rank == 0
93 self.extra_data = []
94 self.singleprecision = singleprecision
95 self._set_defaults()
96 if mode == 'r':
97 if atoms is not None:
98 raise ValueError('You cannot specify atoms in read mode.')
99 self._open_read()
100 elif mode == 'w':
101 self._open_write(atoms, backup, backend)
102 elif mode == 'a':
103 self._open_append(atoms, backend)
104 else:
105 raise ValueError('Unknown mode: ' + str(mode))
107 def _set_defaults(self):
108 "Set default values for internal parameters."
109 self.version = 1
110 self.subtype = 'normal'
111 self.datatypes = {'positions': True,
112 'numbers': 'once',
113 'tags': 'once',
114 'masses': 'once',
115 'momenta': True,
116 'forces': True,
117 'energy': True,
118 'energies': False,
119 'stress': False,
120 'magmoms': True}
122 def _set_backend(self, backend):
123 """Set the backed doing the actual I/O."""
124 if backend is not None:
125 self.backend_name = backend
127 if self.backend_name == 'ulm':
128 self.backend = UlmBundleBackend(self.master, self.singleprecision)
129 else:
130 raise NotImplementedError(
131 'This version of ASE cannot use BundleTrajectory '
132 'with backend "%s"' % self.backend_name)
134 def write(self, atoms=None):
135 """Write the atoms to the file.
137 If the atoms argument is not given, the atoms object specified
138 when creating the trajectory object is used.
139 """
140 # Check that we are in write mode
141 if self.state == 'prewrite':
142 self.state = 'write'
143 assert self.nframes == 0
144 elif self.state != 'write':
145 raise RuntimeError('Cannot write in ' + self.state + ' mode.')
147 if atoms is None:
148 atoms = self.atoms
150 # Handle NEB etc. If atoms is just a normal Atoms object, it is used
151 # as-is.
152 for image in atoms.iterimages():
153 self._write_atoms(image)
155 def _write_atoms(self, atoms):
156 "Write a single atoms object to file."
157 self._call_observers(self.pre_observers)
158 self.log('Beginning to write frame ' + str(self.nframes))
159 framedir = self._make_framedir(self.nframes)
161 # Check which data should be written the first time:
162 # Modify datatypes so any element of type 'once' becomes true
163 # for the first frame but false for subsequent frames.
164 datatypes = {}
165 for k, v in self.datatypes.items():
166 if v == 'once':
167 v = (self.nframes == 0)
168 datatypes[k] = v
170 # Write 'small' data structures. They are written jointly.
171 smalldata = {'pbc': atoms.get_pbc(),
172 'cell': atoms.get_cell(),
173 'natoms': atoms.get_global_number_of_atoms(),
174 'constraints': atoms.constraints}
175 if datatypes.get('energy'):
176 try:
177 smalldata['energy'] = atoms.get_potential_energy()
178 except (RuntimeError, PropertyNotImplementedError):
179 self.datatypes['energy'] = False
180 if datatypes.get('stress'):
181 try:
182 smalldata['stress'] = atoms.get_stress()
183 except PropertyNotImplementedError:
184 self.datatypes['stress'] = False
185 self.backend.write_small(framedir, smalldata)
187 # Write the large arrays.
188 if datatypes.get('positions'):
189 self.backend.write(framedir, 'positions', atoms.get_positions())
190 if datatypes.get('numbers'):
191 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers())
192 if datatypes.get('tags'):
193 if atoms.has('tags'):
194 self.backend.write(framedir, 'tags', atoms.get_tags())
195 else:
196 self.datatypes['tags'] = False
197 if datatypes.get('masses'):
198 if atoms.has('masses'):
199 self.backend.write(framedir, 'masses', atoms.get_masses())
200 else:
201 self.datatypes['masses'] = False
202 if datatypes.get('momenta'):
203 if atoms.has('momenta'):
204 self.backend.write(framedir, 'momenta', atoms.get_momenta())
205 else:
206 self.datatypes['momenta'] = False
207 if datatypes.get('magmoms'):
208 if atoms.has('initial_magmoms'):
209 self.backend.write(framedir, 'magmoms',
210 atoms.get_initial_magnetic_moments())
211 else:
212 self.datatypes['magmoms'] = False
213 if datatypes.get('forces'):
214 try:
215 x = atoms.get_forces()
216 except (RuntimeError, PropertyNotImplementedError):
217 self.datatypes['forces'] = False
218 else:
219 self.backend.write(framedir, 'forces', x)
220 del x
221 if datatypes.get('energies'):
222 try:
223 x = atoms.get_potential_energies()
224 except (RuntimeError, PropertyNotImplementedError):
225 self.datatypes['energies'] = False
226 else:
227 self.backend.write(framedir, 'energies', x)
228 del x
229 # Write any extra data
230 for (label, source, once) in self.extra_data:
231 if self.nframes == 0 or not once:
232 if source is not None:
233 x = source()
234 else:
235 x = atoms.get_array(label)
236 self.backend.write(framedir, label, x)
237 del x
238 if once:
239 self.datatypes[label] = 'once'
240 else:
241 self.datatypes[label] = True
242 # Finally, write metadata if it is the first frame
243 if self.nframes == 0:
244 metadata = {'datatypes': self.datatypes}
245 self._write_metadata(metadata)
246 self._write_nframes(self.nframes + 1)
247 self._call_observers(self.post_observers)
248 self.log('Done writing frame ' + str(self.nframes))
249 self.nframes += 1
251 def select_data(self, data, value):
252 """Selects if a given data type should be written.
254 Data can be written in every frame (specify True), in the
255 first frame only (specify 'only') or not at all (specify
256 False). Not all data types support the 'only' keyword, if not
257 supported it is interpreted as True.
259 The following data types are supported, the letter in parenthesis
260 indicates the default:
262 positions (T), numbers (O), tags (O), masses (O), momenta (T),
263 forces (T), energy (T), energies (F), stress (F), magmoms (T)
265 If a given property is not present during the first write, it
266 will be not be saved at all.
267 """
268 if value not in (True, False, 'once'):
269 raise ValueError('Unknown write mode')
270 if data not in self.datatypes:
271 raise ValueError('Unsupported data type: ' + data)
272 self.datatypes[data] = value
274 def set_extra_data(self, name, source=None, once=False):
275 """Adds extra data to be written.
277 Parameters
278 ----------
279 name: The name of the data.
281 source (optional): If specified, a callable object returning
282 the data to be written. If not specified it is instead
283 assumed that the atoms contains the data as an array of the
284 same name.
286 once (optional): If specified and True, the data will only be
287 written to the first frame.
288 """
289 self.extra_data.append((name, source, once))
291 def close(self):
292 "Closes the trajectory."
293 self.state = 'closed'
294 lf = getattr(self, 'logfile', None)
295 self.backend.close(log=lf)
296 if lf is not None:
297 lf.close()
298 del self.logfile
300 def log(self, text):
301 """Write to the log file in the bundle.
303 Logging is only possible in write/append mode.
305 This function is mainly for internal use, but can also be called by
306 the user.
307 """
308 if not (self.master or self.slavelog):
309 return
310 text = time.asctime() + ': ' + text
311 if hasattr(self, 'logfile'):
312 # Logging enabled
313 if self.logfile is None:
314 # Logfile not yet open
315 try:
316 self.logdata.append(text)
317 except AttributeError:
318 self.logdata = [text]
319 else:
320 self.logfile.write(text + '\n')
321 self.logfile.flush()
322 else:
323 raise RuntimeError('Cannot write to log file in mode ' +
324 self.state)
326 # __getitem__ is the main reading method.
327 def __getitem__(self, n):
328 return self._read(n)
330 def _read(self, n):
331 """Read an atoms object from the BundleTrajectory."""
332 if self.state != 'read':
333 raise OSError(f'Cannot read in {self.state} mode')
334 if n < 0:
335 n += self.nframes
336 if n < 0 or n >= self.nframes:
337 raise IndexError('Trajectory index %d out of range [0, %d['
338 % (n, self.nframes))
340 framedir = os.path.join(self.filename, 'F' + str(n))
341 framezero = os.path.join(self.filename, 'F0')
342 smalldata = self.backend.read_small(framedir)
343 data = {}
344 data['pbc'] = smalldata['pbc']
345 data['cell'] = smalldata['cell']
346 data['constraint'] = smalldata['constraints']
347 if self.subtype == 'split':
348 self.backend.set_fragments(smalldata['fragments'])
349 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID')
350 else:
351 self.atom_id = None
352 atoms = Atoms(**data)
353 natoms = smalldata['natoms']
354 for name in ('positions', 'numbers', 'tags', 'masses',
355 'momenta'):
356 if self.datatypes.get(name):
357 atoms.arrays[name] = self._read_data(framezero, framedir,
358 name, self.atom_id)
359 assert len(atoms.arrays[name]) == natoms
361 # Create the atoms object
362 if self.datatypes.get('energy'):
363 if self.datatypes.get('forces'):
364 forces = self.backend.read(framedir, 'forces')
365 else:
366 forces = None
367 if self.datatypes.get('magmoms'):
368 magmoms = self.backend.read(framedir, 'magmoms')
369 else:
370 magmoms = None
371 calc = SinglePointCalculator(atoms,
372 energy=smalldata.get('energy'),
373 forces=forces,
374 stress=smalldata.get('stress'),
375 magmoms=magmoms)
376 atoms.calc = calc
377 return atoms
379 def read_extra_data(self, name, n=0):
380 """Read extra data stored alongside the atoms.
382 Currently only used to read data stored by an NPT dynamics object.
383 The data is not associated with individual atoms.
384 """
385 if self.state != 'read':
386 raise OSError(f'Cannot read extra data in {self.state} mode')
387 # Handle negative n.
388 if n < 0:
389 n += self.nframes
390 if n < 0 or n >= self.nframes:
391 raise IndexError('Trajectory index %d out of range [0, %d['
392 % (n, self.nframes))
393 framedir = os.path.join(self.filename, 'F' + str(n))
394 framezero = os.path.join(self.filename, 'F0')
395 return self._read_data(framezero, framedir, name, self.atom_id)
397 def _read_data(self, f0, f, name, atom_id):
398 "Read single data item."
400 if self.subtype == 'normal':
401 if self.datatypes[name] == 'once':
402 d = self.backend.read(f0, name)
403 else:
404 d = self.backend.read(f, name)
405 elif self.subtype == 'split':
406 if self.datatypes[name] == 'once':
407 d, issplit = self.backend.read_split(f0, name)
408 atom_id, _dummy = self.backend.read_split(f0, 'ID')
409 else:
410 d, issplit = self.backend.read_split(f, name)
411 if issplit:
412 assert atom_id is not None
413 assert len(d) == len(atom_id)
414 d[atom_id] = np.array(d)
415 return d
417 def __len__(self):
418 return self.nframes
420 def _open_log(self):
421 "Open the log file."
422 if not (self.master or self.slavelog):
423 return
424 if self.master:
425 lfn = os.path.join(self.filename, 'log.txt')
426 else:
427 lfn = os.path.join(self.filename, ('log-node%d.txt' %
428 (world.rank,)))
429 self.logfile = open(lfn, 'a', 1) # Append to log if it exists.
430 if hasattr(self, 'logdata'):
431 for text in self.logdata:
432 self.logfile.write(text + '\n')
433 self.logfile.flush()
434 del self.logdata
436 def _open_write(self, atoms, backup, backend):
437 """Open a bundle trajectory for writing.
439 Parameters
440 ----------
441 atoms: Object to be written.
442 backup: (bool) Whether a backup is kept if file already exists.
443 backend: Which type of backend to use.
445 Note that the file name of the bundle is already stored as an attribute.
446 """
447 self._set_backend(backend)
448 self.logfile = None # enable delayed logging
449 self.atoms = atoms
450 if os.path.exists(self.filename):
451 # The output directory already exists.
452 barrier() # all must have time to see it exists
453 if not self.is_bundle(self.filename, allowempty=True):
454 raise OSError(
455 'Filename "' + self.filename +
456 '" already exists, but is not a BundleTrajectory.' +
457 ' Cowardly refusing to remove it.')
458 if self.is_empty_bundle(self.filename):
459 barrier()
460 self.log(f'Deleting old "{self.filename}" as it is empty')
461 self.delete_bundle(self.filename)
462 elif not backup:
463 barrier()
464 self.log(
465 f'Deleting old "{self.filename}" as backup is turned off.')
466 self.delete_bundle(self.filename)
467 else:
468 barrier()
469 # Make a backup file
470 bakname = self.filename + '.bak'
471 if os.path.exists(bakname):
472 barrier() # All must see it exists
473 self.log(f'Deleting old backup file "{bakname}"')
474 self.delete_bundle(bakname)
475 self.log(f'Renaming "{self.filename}" to "{bakname}"')
476 self._rename_bundle(self.filename, bakname)
477 # Ready to create a new bundle.
478 barrier()
479 self.log(f'Creating new "{self.filename}"')
480 self._make_bundledir(self.filename)
481 self.state = 'prewrite'
482 self._write_metadata({})
483 self._write_nframes(0) # Mark new bundle as empty
484 self._open_log()
485 self.nframes = 0
487 def _open_read(self):
488 "Open a bundle trajectory for reading."
489 if not os.path.exists(self.filename):
490 raise OSError('File not found: ' + self.filename)
491 if not self.is_bundle(self.filename):
492 raise OSError('Not a BundleTrajectory: ' + self.filename)
493 self.state = 'read'
494 # Read the metadata
495 metadata = self._read_metadata()
496 self.metadata = metadata
497 if metadata['version'] > self.version:
498 raise NotImplementedError(
499 'This version of ASE cannot read a BundleTrajectory version ' +
500 str(metadata['version']))
501 if metadata['subtype'] not in ('normal', 'split'):
502 raise NotImplementedError(
503 'This version of ASE cannot read BundleTrajectory subtype ' +
504 metadata['subtype'])
505 self.subtype = metadata['subtype']
506 if metadata['backend'] == 'ulm':
507 self.singleprecision = metadata['ulm.singleprecision']
508 self._set_backend(metadata['backend'])
509 self.nframes = self._read_nframes()
510 if self.nframes == 0:
511 raise OSError('Empty BundleTrajectory')
512 self.datatypes = metadata['datatypes']
513 try:
514 self.pythonmajor = metadata['python_ver'][0]
515 except KeyError:
516 self.pythonmajor = 2 # Assume written with Python 2.
517 # We need to know if we are running Python 3.X and try to read
518 # a bundle written with Python 2.X
519 self.backend.readpy2 = (self.pythonmajor == 2)
520 self.state = 'read'
522 def _open_append(self, atoms, backend):
523 """Open a trajectory for writing in append mode.
525 If there is no pre-existing bundle, it is just opened in write mode
526 instead.
528 Parameters
529 ----------
530 atoms: Object to be written.
531 backend: The backend to be used if a new bundle is opened. Ignored
532 if we append to existing bundle, as the backend cannot be
533 changed.
535 The filename is already stored as an attribute.
536 """
537 if not os.path.exists(self.filename):
538 # OK, no old bundle. Open as for write instead.
539 barrier()
540 self._open_write(atoms, False, backend)
541 return
542 if not self.is_bundle(self.filename):
543 raise OSError('Not a BundleTrajectory: ' + self.filename)
544 self.state = 'read'
545 metadata = self._read_metadata()
546 self.metadata = metadata
547 if metadata['version'] != self.version:
548 raise NotImplementedError(
549 'Cannot append to a BundleTrajectory version '
550 '%s (supported version is %s)' % (str(metadata['version']),
551 str(self.version)))
552 if metadata['subtype'] not in ('normal', 'split'):
553 raise NotImplementedError(
554 'This version of ASE cannot append to BundleTrajectory '
555 'subtype ' + metadata['subtype'])
556 self.subtype = metadata['subtype']
557 if metadata['backend'] == 'ulm':
558 self.singleprecision = metadata['ulm.singleprecision']
559 self._set_backend(metadata['backend'])
560 self.nframes = self._read_nframes()
561 self._open_log()
562 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename,
563 self.nframes))
564 self.state = 'write'
565 self.atoms = atoms
567 @property
568 def path(self):
569 return Path(self.filename)
571 @property
572 def metadata_path(self):
573 return self.path / 'metadata.json'
575 def _write_nframes(self, n):
576 "Write the number of frames in the bundle."
577 assert self.state == 'write' or self.state == 'prewrite'
578 with paropen(self.path / 'frames', 'w') as fd:
579 fd.write(str(n) + '\n')
581 def _read_nframes(self):
582 "Read the number of frames."
583 return int((self.path / 'frames').read_text())
585 def _write_metadata(self, metadata):
586 """Write the metadata file of the bundle.
588 Modifies the medadata dictionary!
589 """
590 # Add standard fields that must always be present.
591 assert self.state == 'write' or self.state == 'prewrite'
592 metadata['format'] = 'BundleTrajectory'
593 metadata['version'] = self.version
594 metadata['subtype'] = self.subtype
595 metadata['backend'] = self.backend_name
596 if self.backend_name == 'ulm':
597 metadata['ulm.singleprecision'] = self.singleprecision
598 metadata['python_ver'] = tuple(sys.version_info)
599 encode = jsonio.MyEncoder(indent=4).encode
600 fido = encode(metadata)
601 with paropen(self.metadata_path, 'w') as fd:
602 fd.write(fido)
604 def _read_metadata(self):
605 """Read the metadata."""
606 assert self.state == 'read'
607 return jsonio.decode(self.metadata_path.read_text())
609 @staticmethod
610 def is_bundle(filename, allowempty=False):
611 """Check if a filename exists and is a BundleTrajectory.
613 If allowempty=True, an empty folder is regarded as an
614 empty BundleTrajectory."""
615 filename = Path(filename)
616 if not filename.is_dir():
617 return False
618 if allowempty and not os.listdir(filename):
619 return True # An empty BundleTrajectory
620 metaname = filename / 'metadata.json'
621 if metaname.is_file():
622 mdata = jsonio.decode(metaname.read_text())
623 else:
624 return False
626 try:
627 return mdata['format'] == 'BundleTrajectory'
628 except KeyError:
629 return False
631 @staticmethod
632 def is_empty_bundle(filename):
633 """Check if a filename is an empty bundle.
635 Assumes that it is a bundle."""
636 if not os.listdir(filename):
637 return True # Empty folders are empty bundles.
638 with open(os.path.join(filename, 'frames'), 'rb') as fd:
639 nframes = int(fd.read())
641 # File may be removed by the master immediately after this.
642 barrier()
643 return nframes == 0
645 @classmethod
646 def delete_bundle(cls, filename):
647 "Deletes a bundle."
648 if world.rank == 0:
649 # Only the master deletes
650 if not cls.is_bundle(filename, allowempty=True):
651 raise OSError(
652 f'Cannot remove "{filename}" as it is not a bundle '
653 'trajectory.'
654 )
655 if os.path.islink(filename):
656 # A symbolic link to a bundle. Only remove the link.
657 os.remove(filename)
658 else:
659 # A real bundle
660 shutil.rmtree(filename)
661 else:
662 # All other tasks wait for the directory to go away.
663 while os.path.exists(filename):
664 time.sleep(1)
665 # The master may not proceed before all tasks have seen the
666 # directory go away, as it might otherwise create a new bundle
667 # with the same name, fooling the wait loop in _make_bundledir.
668 barrier()
670 def _rename_bundle(self, oldname, newname):
671 "Rename a bundle. Used to create the .bak"
672 if self.master:
673 os.rename(oldname, newname)
674 else:
675 while os.path.exists(oldname):
676 time.sleep(1)
677 # The master may not proceed before all tasks have seen the
678 # directory go away.
679 barrier()
681 def _make_bundledir(self, filename):
682 """Make the main bundle directory.
684 Since all MPI tasks might write to it, all tasks must wait for
685 the directory to appear.
686 """
687 self.log('Making directory ' + filename)
688 assert not os.path.isdir(filename)
689 barrier()
690 if self.master:
691 os.mkdir(filename)
692 else:
693 i = 0
694 while not os.path.isdir(filename):
695 time.sleep(1)
696 i += 1
697 if i > 10:
698 self.log('Waiting %d seconds for %s to appear!'
699 % (i, filename))
701 def _make_framedir(self, frame):
702 """Make subdirectory for the frame.
704 As only the master writes to it, no synchronization between
705 MPI tasks is necessary.
706 """
707 framedir = os.path.join(self.filename, 'F' + str(frame))
708 if self.master:
709 self.log('Making directory ' + framedir)
710 os.mkdir(framedir)
711 return framedir
713 def pre_write_attach(self, function, interval=1, *args, **kwargs):
714 """Attach a function to be called before writing begins.
716 function: The function or callable object to be called.
718 interval: How often the function is called. Default: every time (1).
720 All other arguments are stored, and passed to the function.
721 """
722 if not callable(function):
723 raise ValueError('Callback object must be callable.')
724 self.pre_observers.append((function, interval, args, kwargs))
726 def post_write_attach(self, function, interval=1, *args, **kwargs):
727 """Attach a function to be called after writing ends.
729 function: The function or callable object to be called.
731 interval: How often the function is called. Default: every time (1).
733 All other arguments are stored, and passed to the function.
734 """
735 if not callable(function):
736 raise ValueError('Callback object must be callable.')
737 self.post_observers.append((function, interval, args, kwargs))
739 def _call_observers(self, obs):
740 "Call pre/post write observers."
741 for function, interval, args, kwargs in obs:
742 if (self.nframes + 1) % interval == 0:
743 function(*args, **kwargs)
746class UlmBundleBackend:
747 """Backend for BundleTrajectories stored as ASE Ulm files."""
749 def __init__(self, master, singleprecision):
750 # Store if this backend will actually write anything
751 self.writesmall = master
752 self.writelarge = master
753 self.singleprecision = singleprecision
755 # Integer data may be downconverted to the following types
756 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16',
757 'uint32', 'int32', 'uint64', 'int64']
758 # Dict comprehensions not supported in Python 2.6 :-(
759 self.int_dtype = {k: getattr(np, k)
760 for k in self.integral_dtypes}
761 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min
762 for k in self.integral_dtypes}
763 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max
764 for k in self.integral_dtypes}
765 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize
766 for k in self.integral_dtypes}
768 def write_small(self, framedir, smalldata):
769 "Write small data to be written jointly."
770 if self.writesmall:
771 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd:
772 fd.write(**smalldata)
774 def write(self, framedir, name, data):
775 "Write data to separate file."
776 if self.writelarge:
777 shape = data.shape
778 dtype = str(data.dtype)
779 stored_as = dtype
780 all_identical = False
781 # Check if it a type that can be stored with less space
782 if np.issubdtype(data.dtype, np.integer):
783 # An integer type, we may want to convert
784 minval = data.min()
785 maxval = data.max()
787 # ulm cannot write np.bool_:
788 all_identical = bool(minval == maxval)
789 if all_identical:
790 data = int(data.flat[0]) # Convert to standard integer
791 else:
792 for typ in self.integral_dtypes:
793 if (minval >= self.int_minval[typ] and
794 maxval <= self.int_maxval[typ] and
795 data.itemsize > self.int_itemsize[typ]):
797 # Convert to smaller type
798 stored_as = typ
799 data = data.astype(self.int_dtype[typ])
800 elif data.dtype == np.float32 or data.dtype == np.float64:
801 all_identical = bool(data.min() == data.max())
802 if all_identical:
803 data = float(data.flat[0]) # Convert to standard float
804 elif data.dtype == np.float64 and self.singleprecision:
805 # Downconvert double to single precision
806 stored_as = 'float32'
807 data = data.astype(np.float32)
808 fn = os.path.join(framedir, name + '.ulm')
809 with ulmopen(fn, 'w') as fd:
810 fd.write(shape=shape,
811 dtype=dtype,
812 stored_as=stored_as,
813 all_identical=all_identical,
814 data=data)
816 def read_small(self, framedir):
817 "Read small data."
818 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd:
819 return fd.asdict()
821 def read(self, framedir, name):
822 "Read data from separate file."
823 fn = os.path.join(framedir, name + '.ulm')
824 with ulmopen(fn, 'r') as fd:
825 if fd.all_identical:
826 # Only a single data value
827 data = np.zeros(fd.shape,
828 dtype=getattr(np, fd.dtype)) + fd.data
829 elif fd.dtype == fd.stored_as:
830 # Easy, the array can be returned as-is.
831 data = fd.data
832 else:
833 # Cast the data back
834 data = fd.data.astype(getattr(np, fd.dtype))
835 return data
837 def read_info(self, framedir, name, split=None):
838 """Read information about file contents without reading the data.
840 Information is a dictionary containing as aminimum the shape and
841 type.
842 """
843 fn = os.path.join(framedir, name + '.ulm')
844 if split is None or os.path.exists(fn):
845 with ulmopen(fn, 'r') as fd:
846 info = {}
847 info['shape'] = fd.shape
848 info['type'] = fd.dtype
849 info['stored_as'] = fd.stored_as
850 info['identical'] = fd.all_identical
851 return info
852 else:
853 info = {}
854 for i in range(split):
855 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm')
856 with ulmopen(fn, 'r') as fd:
857 if i == 0:
858 info['shape'] = list(fd.shape)
859 info['type'] = fd.dtype
860 info['stored_as'] = fd.stored_as
861 info['identical'] = fd.all_identical
862 else:
863 info['shape'][0] += fd.shape[0]
864 assert info['type'] == fd.dtype
865 info['identical'] = (info['identical']
866 and fd.all_identical)
867 info['shape'] = tuple(info['shape'])
868 return info
870 def set_fragments(self, nfrag):
871 self.nfrag = nfrag
873 def read_split(self, framedir, name):
874 """Read data from multiple files.
876 Falls back to reading from single file if that is how data is stored.
878 Returns the data and an object indicating if the data was really
879 read from split files. The latter object is False if not
880 read from split files, but is an array of the segment length if
881 split files were used.
882 """
883 data = []
884 if os.path.exists(os.path.join(framedir, name + '.ulm')):
885 # Not stored in split form!
886 return (self.read(framedir, name), False)
887 for i in range(self.nfrag):
888 suf = '_%d' % (i,)
889 data.append(self.read(framedir, name + suf))
890 seglengths = [len(d) for d in data]
891 return (np.concatenate(data), seglengths)
893 def close(self, log=None):
894 """Close anything that needs to be closed by the backend.
896 The default backend does nothing here.
897 """
900def read_bundletrajectory(filename, index=-1):
901 """Reads one or more atoms objects from a BundleTrajectory.
903 Arguments:
905 filename: str
906 The name of the bundle (really a directory!)
907 index: int
908 An integer specifying which frame to read, or an index object
909 for reading multiple frames. Default: -1 (reads the last
910 frame).
911 """
912 traj = BundleTrajectory(filename, mode='r')
913 if isinstance(index, int):
914 yield traj[index]
916 for i in range(*index.indices(len(traj))):
917 yield traj[i]
920def write_bundletrajectory(filename, images, append=False):
921 """Write image(s) to a BundleTrajectory.
923 Write also energy, forces, and stress if they are already
924 calculated.
925 """
927 if append:
928 mode = 'a'
929 else:
930 mode = 'w'
931 traj = BundleTrajectory(filename, mode=mode)
933 if hasattr(images, 'get_positions'):
934 images = [images]
936 for atoms in images:
937 # Avoid potentially expensive calculations:
938 calc = atoms.calc
939 if hasattr(calc, 'calculation_required'):
940 for quantity in ('energy', 'forces', 'stress', 'magmoms'):
941 traj.select_data(quantity,
942 not calc.calculation_required(atoms,
943 [quantity]))
944 traj.write(atoms)
945 traj.close()
948def print_bundletrajectory_info(filename):
949 """Prints information about a BundleTrajectory.
951 Mainly intended to be called from a command line tool.
952 """
953 if not BundleTrajectory.is_bundle(filename):
954 raise ValueError('Not a BundleTrajectory!')
955 if BundleTrajectory.is_empty_bundle(filename):
956 print(filename, 'is an empty BundleTrajectory.')
957 return
958 # Read the metadata
959 fn = os.path.join(filename, 'metadata.json')
960 with open(fn) as fd:
961 metadata = jsonio.decode(fd.read())
963 print(f'Metadata information of BundleTrajectory "{filename}":')
964 for k, v in metadata.items():
965 if k != 'datatypes':
966 print(f" {k}: {v}")
967 with open(os.path.join(filename, 'frames'), 'rb') as fd:
968 nframes = int(fd.read())
969 print('Number of frames: %i' % (nframes,))
970 print('Data types:')
971 for k, v in metadata['datatypes'].items():
972 if v == 'once':
973 print(f' {k}: First frame only.')
974 elif v:
975 print(f' {k}: All frames.')
976 # Look at first frame
977 if metadata['backend'] == 'ulm':
978 backend = UlmBundleBackend(True, False)
979 else:
980 raise NotImplementedError(
981 f'Backend {metadata["backend"]} not supported.')
982 frame = os.path.join(filename, 'F0')
983 small = backend.read_small(frame)
984 print('Contents of first frame:')
985 for k, v in small.items():
986 if k == 'constraints':
987 if v:
988 print(f' {len(v)} constraints are present')
989 else:
990 print(' Constraints are absent.')
991 elif k == 'pbc':
992 print(f' Periodic boundary conditions: {v!s}')
993 elif k == 'natoms':
994 print(' Number of atoms: %i' % (v,))
995 elif hasattr(v, 'shape'):
996 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}')
997 if k == 'cell':
998 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0]))
999 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1]))
1000 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2]))
1001 else:
1002 print(f' {k}: {v!s}')
1003 # Read info from separate files.
1004 if metadata['subtype'] == 'split':
1005 nsplit = small['fragments']
1006 else:
1007 nsplit = False
1008 for k, v in metadata['datatypes'].items():
1009 if v and k not in small:
1010 info = backend.read_info(frame, k, nsplit)
1011 infoline = f' {k}: '
1012 for k, v in info.items():
1013 infoline += f'{k} = {v!s}, '
1014 infoline = infoline[:-2] + '.' # Fix punctuation.
1015 print(infoline)
1018def main():
1019 import optparse
1020 parser = optparse.OptionParser(
1021 usage='python -m ase.io.bundletrajectory '
1022 'a.bundle [b.bundle ...]',
1023 description='Print information about '
1024 'the contents of one or more bundletrajectories.')
1025 _opts, args = parser.parse_args()
1026 for name in args:
1027 print_bundletrajectory_info(name)
1030if __name__ == '__main__':
1031 main()