Coverage for /builds/ase/ase/ase/io/bundletrajectory.py: 74.13%

576 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3"""bundletrajectory - a module for I/O from large MD simulations. 

4 

5The BundleTrajectory class writes trajectory into a directory with the 

6following structure, except we now use JSON instead of pickle, 

7so this text needs updating:: 

8 

9 filename.bundle (dir) 

10 metadata.json Data about the file format, and about which 

11 data is present. 

12 frames The number of frames (ascii file) 

13 F0 (dir) Frame number 0 

14 smalldata.ulm Small data structures in a dictionary 

15 (pbc, cell, ...) 

16 numbers.ulm Atomic numbers 

17 positions.ulm Positions 

18 momenta.ulm Momenta 

19 ... 

20 F1 (dir) 

21 

22There is a folder for each frame, and the data is in the ASE Ulm format. 

23""" 

24 

25import os 

26import shutil 

27import sys 

28import time 

29from pathlib import Path 

30 

31import numpy as np 

32 

33from ase import Atoms 

34from ase.calculators.singlepoint import ( 

35 PropertyNotImplementedError, 

36 SinglePointCalculator, 

37) 

38 

39# The system json module causes memory leaks! Use ase's own. 

40# import json 

41from ase.io import jsonio 

42from ase.io.ulm import open as ulmopen 

43from ase.parallel import barrier, paropen, world 

44 

45 

46class BundleTrajectory: 

47 """Reads and writes atoms into a .bundle directory. 

48 

49 The BundleTrajectory is an alternative way of storing 

50 trajectories, intended for large-scale molecular dynamics 

51 simulations, where a single flat file becomes unwieldy. Instead, 

52 the data is stored in directory, a 'bundle' (the name bundle is 

53 inspired from bundles in Mac OS, which are really just directories 

54 the user is supposed to think of as a single file-like unit). 

55 

56 Parameters: 

57 

58 filename: 

59 The name of the directory. Preferably ending in .bundle. 

60 

61 mode (optional): 

62 The file opening mode. 'r' means open for reading, 'w' for 

63 writing and 'a' for appending. Default: 'r'. If opening in 

64 write mode, and the filename already exists, the old file is 

65 renamed to .bak (any old .bak file is deleted), except if the 

66 existing file is empty. 

67 

68 atoms (optional): 

69 The atoms that will be written. Can only be specified in 

70 write or append mode. If not specified, the atoms must be 

71 given as an argument to the .write() method instead. 

72 

73 backup=True: 

74 Use backup=False to disable renaming of an existing file. 

75 

76 backend='ulm': 

77 Request a backend. Currently only 'ulm' is supported. 

78 Only honored when writing. 

79 

80 singleprecision=False: 

81 Store floating point data in single precision (ulm backend only). 

82 """ 

83 slavelog = True # Log from all nodes 

84 

85 def __init__(self, filename, mode='r', atoms=None, backup=True, 

86 backend='ulm', singleprecision=False): 

87 self.state = 'constructing' 

88 self.filename = filename 

89 self.pre_observers = [] # callback functions before write is performed 

90 self.post_observers = [] # callback functions after write is performed 

91 self.master = world.rank == 0 

92 self.extra_data = [] 

93 self.singleprecision = singleprecision 

94 self._set_defaults() 

95 if mode == 'r': 

96 if atoms is not None: 

97 raise ValueError('You cannot specify atoms in read mode.') 

98 self._open_read() 

99 elif mode == 'w': 

100 self._open_write(atoms, backup, backend) 

101 elif mode == 'a': 

102 self._open_append(atoms, backend) 

103 else: 

104 raise ValueError('Unknown mode: ' + str(mode)) 

105 

106 def _set_defaults(self): 

107 "Set default values for internal parameters." 

108 self.version = 1 

109 self.subtype = 'normal' 

110 self.datatypes = {'positions': True, 

111 'numbers': 'once', 

112 'tags': 'once', 

113 'masses': 'once', 

114 'momenta': True, 

115 'forces': True, 

116 'energy': True, 

117 'energies': False, 

118 'stress': False, 

119 'magmoms': True} 

120 

121 def _set_backend(self, backend): 

122 """Set the backed doing the actual I/O.""" 

123 if backend is not None: 

124 self.backend_name = backend 

125 

126 if self.backend_name == 'ulm': 

127 self.backend = UlmBundleBackend(self.master, self.singleprecision) 

128 else: 

129 raise NotImplementedError( 

130 'This version of ASE cannot use BundleTrajectory ' 

131 'with backend "%s"' % self.backend_name) 

132 

133 def write(self, atoms=None): 

134 """Write the atoms to the file. 

135 

136 If the atoms argument is not given, the atoms object specified 

137 when creating the trajectory object is used. 

138 """ 

139 # Check that we are in write mode 

140 if self.state == 'prewrite': 

141 self.state = 'write' 

142 assert self.nframes == 0 

143 elif self.state != 'write': 

144 raise RuntimeError('Cannot write in ' + self.state + ' mode.') 

145 

146 if atoms is None: 

147 atoms = self.atoms 

148 

149 # Handle NEB etc. If atoms is just a normal Atoms object, it is used 

150 # as-is. 

151 for image in atoms.iterimages(): 

152 self._write_atoms(image) 

153 

154 def _write_atoms(self, atoms): 

155 "Write a single atoms object to file." 

156 self._call_observers(self.pre_observers) 

157 self.log('Beginning to write frame ' + str(self.nframes)) 

158 framedir = self._make_framedir(self.nframes) 

159 

160 # Check which data should be written the first time: 

161 # Modify datatypes so any element of type 'once' becomes true 

162 # for the first frame but false for subsequent frames. 

163 datatypes = {} 

164 for k, v in self.datatypes.items(): 

165 if v == 'once': 

166 v = (self.nframes == 0) 

167 datatypes[k] = v 

168 

169 # Write 'small' data structures. They are written jointly. 

170 smalldata = {'pbc': atoms.get_pbc(), 

171 'cell': atoms.get_cell(), 

172 'natoms': atoms.get_global_number_of_atoms(), 

173 'constraints': atoms.constraints} 

174 if datatypes.get('energy'): 

175 try: 

176 smalldata['energy'] = atoms.get_potential_energy() 

177 except (RuntimeError, PropertyNotImplementedError): 

178 self.datatypes['energy'] = False 

179 if datatypes.get('stress'): 

180 try: 

181 smalldata['stress'] = atoms.get_stress() 

182 except PropertyNotImplementedError: 

183 self.datatypes['stress'] = False 

184 self.backend.write_small(framedir, smalldata) 

185 

186 # Write the large arrays. 

187 if datatypes.get('positions'): 

188 self.backend.write(framedir, 'positions', atoms.get_positions()) 

189 if datatypes.get('numbers'): 

190 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers()) 

191 if datatypes.get('tags'): 

192 if atoms.has('tags'): 

193 self.backend.write(framedir, 'tags', atoms.get_tags()) 

194 else: 

195 self.datatypes['tags'] = False 

196 if datatypes.get('masses'): 

197 if atoms.has('masses'): 

198 self.backend.write(framedir, 'masses', atoms.get_masses()) 

199 else: 

200 self.datatypes['masses'] = False 

201 if datatypes.get('momenta'): 

202 if atoms.has('momenta'): 

203 self.backend.write(framedir, 'momenta', atoms.get_momenta()) 

204 else: 

205 self.datatypes['momenta'] = False 

206 if datatypes.get('magmoms'): 

207 if atoms.has('initial_magmoms'): 

208 self.backend.write(framedir, 'magmoms', 

209 atoms.get_initial_magnetic_moments()) 

210 else: 

211 self.datatypes['magmoms'] = False 

212 if datatypes.get('forces'): 

213 try: 

214 x = atoms.get_forces() 

215 except (RuntimeError, PropertyNotImplementedError): 

216 self.datatypes['forces'] = False 

217 else: 

218 self.backend.write(framedir, 'forces', x) 

219 del x 

220 if datatypes.get('energies'): 

221 try: 

222 x = atoms.get_potential_energies() 

223 except (RuntimeError, PropertyNotImplementedError): 

224 self.datatypes['energies'] = False 

225 else: 

226 self.backend.write(framedir, 'energies', x) 

227 del x 

228 # Write any extra data 

229 for (label, source, once) in self.extra_data: 

230 if self.nframes == 0 or not once: 

231 if source is not None: 

232 x = source() 

233 else: 

234 x = atoms.get_array(label) 

235 self.backend.write(framedir, label, x) 

236 del x 

237 if once: 

238 self.datatypes[label] = 'once' 

239 else: 

240 self.datatypes[label] = True 

241 # Finally, write metadata if it is the first frame 

242 if self.nframes == 0: 

243 metadata = {'datatypes': self.datatypes} 

244 self._write_metadata(metadata) 

245 self._write_nframes(self.nframes + 1) 

246 self._call_observers(self.post_observers) 

247 self.log('Done writing frame ' + str(self.nframes)) 

248 self.nframes += 1 

249 

250 def select_data(self, data, value): 

251 """Selects if a given data type should be written. 

252 

253 Data can be written in every frame (specify True), in the 

254 first frame only (specify 'only') or not at all (specify 

255 False). Not all data types support the 'only' keyword, if not 

256 supported it is interpreted as True. 

257 

258 The following data types are supported, the letter in parenthesis 

259 indicates the default: 

260 

261 positions (T), numbers (O), tags (O), masses (O), momenta (T), 

262 forces (T), energy (T), energies (F), stress (F), magmoms (T) 

263 

264 If a given property is not present during the first write, it 

265 will be not be saved at all. 

266 """ 

267 if value not in (True, False, 'once'): 

268 raise ValueError('Unknown write mode') 

269 if data not in self.datatypes: 

270 raise ValueError('Unsupported data type: ' + data) 

271 self.datatypes[data] = value 

272 

273 def set_extra_data(self, name, source=None, once=False): 

274 """Adds extra data to be written. 

275 

276 Parameters: 

277 name: The name of the data. 

278 

279 source (optional): If specified, a callable object returning 

280 the data to be written. If not specified it is instead 

281 assumed that the atoms contains the data as an array of the 

282 same name. 

283 

284 once (optional): If specified and True, the data will only be 

285 written to the first frame. 

286 """ 

287 self.extra_data.append((name, source, once)) 

288 

289 def close(self): 

290 "Closes the trajectory." 

291 self.state = 'closed' 

292 lf = getattr(self, 'logfile', None) 

293 self.backend.close(log=lf) 

294 if lf is not None: 

295 lf.close() 

296 del self.logfile 

297 

298 def log(self, text): 

299 """Write to the log file in the bundle. 

300 

301 Logging is only possible in write/append mode. 

302 

303 This function is mainly for internal use, but can also be called by 

304 the user. 

305 """ 

306 if not (self.master or self.slavelog): 

307 return 

308 text = time.asctime() + ': ' + text 

309 if hasattr(self, 'logfile'): 

310 # Logging enabled 

311 if self.logfile is None: 

312 # Logfile not yet open 

313 try: 

314 self.logdata.append(text) 

315 except AttributeError: 

316 self.logdata = [text] 

317 else: 

318 self.logfile.write(text + '\n') 

319 self.logfile.flush() 

320 else: 

321 raise RuntimeError('Cannot write to log file in mode ' + 

322 self.state) 

323 

324 # __getitem__ is the main reading method. 

325 def __getitem__(self, n): 

326 return self._read(n) 

327 

328 def _read(self, n): 

329 """Read an atoms object from the BundleTrajectory.""" 

330 if self.state != 'read': 

331 raise OSError(f'Cannot read in {self.state} mode') 

332 if n < 0: 

333 n += self.nframes 

334 if n < 0 or n >= self.nframes: 

335 raise IndexError('Trajectory index %d out of range [0, %d[' 

336 % (n, self.nframes)) 

337 

338 framedir = os.path.join(self.filename, 'F' + str(n)) 

339 framezero = os.path.join(self.filename, 'F0') 

340 smalldata = self.backend.read_small(framedir) 

341 data = {} 

342 data['pbc'] = smalldata['pbc'] 

343 data['cell'] = smalldata['cell'] 

344 data['constraint'] = smalldata['constraints'] 

345 if self.subtype == 'split': 

346 self.backend.set_fragments(smalldata['fragments']) 

347 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID') 

348 else: 

349 self.atom_id = None 

350 atoms = Atoms(**data) 

351 natoms = smalldata['natoms'] 

352 for name in ('positions', 'numbers', 'tags', 'masses', 

353 'momenta'): 

354 if self.datatypes.get(name): 

355 atoms.arrays[name] = self._read_data(framezero, framedir, 

356 name, self.atom_id) 

357 assert len(atoms.arrays[name]) == natoms 

358 

359 # Create the atoms object 

360 if self.datatypes.get('energy'): 

361 if self.datatypes.get('forces'): 

362 forces = self.backend.read(framedir, 'forces') 

363 else: 

364 forces = None 

365 if self.datatypes.get('magmoms'): 

366 magmoms = self.backend.read(framedir, 'magmoms') 

367 else: 

368 magmoms = None 

369 calc = SinglePointCalculator(atoms, 

370 energy=smalldata.get('energy'), 

371 forces=forces, 

372 stress=smalldata.get('stress'), 

373 magmoms=magmoms) 

374 atoms.calc = calc 

375 return atoms 

376 

377 def read_extra_data(self, name, n=0): 

378 """Read extra data stored alongside the atoms. 

379 

380 Currently only used to read data stored by an NPT dynamics object. 

381 The data is not associated with individual atoms. 

382 """ 

383 if self.state != 'read': 

384 raise OSError(f'Cannot read extra data in {self.state} mode') 

385 # Handle negative n. 

386 if n < 0: 

387 n += self.nframes 

388 if n < 0 or n >= self.nframes: 

389 raise IndexError('Trajectory index %d out of range [0, %d[' 

390 % (n, self.nframes)) 

391 framedir = os.path.join(self.filename, 'F' + str(n)) 

392 framezero = os.path.join(self.filename, 'F0') 

393 return self._read_data(framezero, framedir, name, self.atom_id) 

394 

395 def _read_data(self, f0, f, name, atom_id): 

396 "Read single data item." 

397 

398 if self.subtype == 'normal': 

399 if self.datatypes[name] == 'once': 

400 d = self.backend.read(f0, name) 

401 else: 

402 d = self.backend.read(f, name) 

403 elif self.subtype == 'split': 

404 if self.datatypes[name] == 'once': 

405 d, issplit = self.backend.read_split(f0, name) 

406 atom_id, _dummy = self.backend.read_split(f0, 'ID') 

407 else: 

408 d, issplit = self.backend.read_split(f, name) 

409 if issplit: 

410 assert atom_id is not None 

411 assert len(d) == len(atom_id) 

412 d[atom_id] = np.array(d) 

413 return d 

414 

415 def __len__(self): 

416 return self.nframes 

417 

418 def _open_log(self): 

419 "Open the log file." 

420 if not (self.master or self.slavelog): 

421 return 

422 if self.master: 

423 lfn = os.path.join(self.filename, 'log.txt') 

424 else: 

425 lfn = os.path.join(self.filename, ('log-node%d.txt' % 

426 (world.rank,))) 

427 self.logfile = open(lfn, 'a', 1) # Append to log if it exists. 

428 if hasattr(self, 'logdata'): 

429 for text in self.logdata: 

430 self.logfile.write(text + '\n') 

431 self.logfile.flush() 

432 del self.logdata 

433 

434 def _open_write(self, atoms, backup, backend): 

435 """Open a bundle trajectory for writing. 

436 

437 Parameters: 

438 atoms: Object to be written. 

439 backup: (bool) Whether a backup is kept if file already exists. 

440 backend: Which type of backend to use. 

441 

442 Note that the file name of the bundle is already stored as an attribute. 

443 """ 

444 self._set_backend(backend) 

445 self.logfile = None # enable delayed logging 

446 self.atoms = atoms 

447 if os.path.exists(self.filename): 

448 # The output directory already exists. 

449 barrier() # all must have time to see it exists 

450 if not self.is_bundle(self.filename, allowempty=True): 

451 raise OSError( 

452 'Filename "' + self.filename + 

453 '" already exists, but is not a BundleTrajectory.' + 

454 ' Cowardly refusing to remove it.') 

455 if self.is_empty_bundle(self.filename): 

456 barrier() 

457 self.log(f'Deleting old "{self.filename}" as it is empty') 

458 self.delete_bundle(self.filename) 

459 elif not backup: 

460 barrier() 

461 self.log( 

462 f'Deleting old "{self.filename}" as backup is turned off.') 

463 self.delete_bundle(self.filename) 

464 else: 

465 barrier() 

466 # Make a backup file 

467 bakname = self.filename + '.bak' 

468 if os.path.exists(bakname): 

469 barrier() # All must see it exists 

470 self.log(f'Deleting old backup file "{bakname}"') 

471 self.delete_bundle(bakname) 

472 self.log(f'Renaming "{self.filename}" to "{bakname}"') 

473 self._rename_bundle(self.filename, bakname) 

474 # Ready to create a new bundle. 

475 barrier() 

476 self.log(f'Creating new "{self.filename}"') 

477 self._make_bundledir(self.filename) 

478 self.state = 'prewrite' 

479 self._write_metadata({}) 

480 self._write_nframes(0) # Mark new bundle as empty 

481 self._open_log() 

482 self.nframes = 0 

483 

484 def _open_read(self): 

485 "Open a bundle trajectory for reading." 

486 if not os.path.exists(self.filename): 

487 raise OSError('File not found: ' + self.filename) 

488 if not self.is_bundle(self.filename): 

489 raise OSError('Not a BundleTrajectory: ' + self.filename) 

490 self.state = 'read' 

491 # Read the metadata 

492 metadata = self._read_metadata() 

493 self.metadata = metadata 

494 if metadata['version'] > self.version: 

495 raise NotImplementedError( 

496 'This version of ASE cannot read a BundleTrajectory version ' + 

497 str(metadata['version'])) 

498 if metadata['subtype'] not in ('normal', 'split'): 

499 raise NotImplementedError( 

500 'This version of ASE cannot read BundleTrajectory subtype ' + 

501 metadata['subtype']) 

502 self.subtype = metadata['subtype'] 

503 if metadata['backend'] == 'ulm': 

504 self.singleprecision = metadata['ulm.singleprecision'] 

505 self._set_backend(metadata['backend']) 

506 self.nframes = self._read_nframes() 

507 if self.nframes == 0: 

508 raise OSError('Empty BundleTrajectory') 

509 self.datatypes = metadata['datatypes'] 

510 try: 

511 self.pythonmajor = metadata['python_ver'][0] 

512 except KeyError: 

513 self.pythonmajor = 2 # Assume written with Python 2. 

514 # We need to know if we are running Python 3.X and try to read 

515 # a bundle written with Python 2.X 

516 self.backend.readpy2 = (self.pythonmajor == 2) 

517 self.state = 'read' 

518 

519 def _open_append(self, atoms, backend): 

520 """Open a trajectory for writing in append mode. 

521 

522 If there is no pre-existing bundle, it is just opened in write mode 

523 instead. 

524 

525 Parameters: 

526 atoms: Object to be written. 

527 backend: The backend to be used if a new bundle is opened. Ignored 

528 if we append to existing bundle, as the backend cannot be 

529 changed. 

530 

531 The filename is already stored as an attribute. 

532 """ 

533 if not os.path.exists(self.filename): 

534 # OK, no old bundle. Open as for write instead. 

535 barrier() 

536 self._open_write(atoms, False, backend) 

537 return 

538 if not self.is_bundle(self.filename): 

539 raise OSError('Not a BundleTrajectory: ' + self.filename) 

540 self.state = 'read' 

541 metadata = self._read_metadata() 

542 self.metadata = metadata 

543 if metadata['version'] != self.version: 

544 raise NotImplementedError( 

545 'Cannot append to a BundleTrajectory version ' 

546 '%s (supported version is %s)' % (str(metadata['version']), 

547 str(self.version))) 

548 if metadata['subtype'] not in ('normal', 'split'): 

549 raise NotImplementedError( 

550 'This version of ASE cannot append to BundleTrajectory ' 

551 'subtype ' + metadata['subtype']) 

552 self.subtype = metadata['subtype'] 

553 if metadata['backend'] == 'ulm': 

554 self.singleprecision = metadata['ulm.singleprecision'] 

555 self._set_backend(metadata['backend']) 

556 self.nframes = self._read_nframes() 

557 self._open_log() 

558 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename, 

559 self.nframes)) 

560 self.state = 'write' 

561 self.atoms = atoms 

562 

563 @property 

564 def path(self): 

565 return Path(self.filename) 

566 

567 @property 

568 def metadata_path(self): 

569 return self.path / 'metadata.json' 

570 

571 def _write_nframes(self, n): 

572 "Write the number of frames in the bundle." 

573 assert self.state == 'write' or self.state == 'prewrite' 

574 with paropen(self.path / 'frames', 'w') as fd: 

575 fd.write(str(n) + '\n') 

576 

577 def _read_nframes(self): 

578 "Read the number of frames." 

579 return int((self.path / 'frames').read_text()) 

580 

581 def _write_metadata(self, metadata): 

582 """Write the metadata file of the bundle. 

583 

584 Modifies the medadata dictionary! 

585 """ 

586 # Add standard fields that must always be present. 

587 assert self.state == 'write' or self.state == 'prewrite' 

588 metadata['format'] = 'BundleTrajectory' 

589 metadata['version'] = self.version 

590 metadata['subtype'] = self.subtype 

591 metadata['backend'] = self.backend_name 

592 if self.backend_name == 'ulm': 

593 metadata['ulm.singleprecision'] = self.singleprecision 

594 metadata['python_ver'] = tuple(sys.version_info) 

595 encode = jsonio.MyEncoder(indent=4).encode 

596 fido = encode(metadata) 

597 with paropen(self.metadata_path, 'w') as fd: 

598 fd.write(fido) 

599 

600 def _read_metadata(self): 

601 """Read the metadata.""" 

602 assert self.state == 'read' 

603 return jsonio.decode(self.metadata_path.read_text()) 

604 

605 @staticmethod 

606 def is_bundle(filename, allowempty=False): 

607 """Check if a filename exists and is a BundleTrajectory. 

608 

609 If allowempty=True, an empty folder is regarded as an 

610 empty BundleTrajectory.""" 

611 filename = Path(filename) 

612 if not filename.is_dir(): 

613 return False 

614 if allowempty and not os.listdir(filename): 

615 return True # An empty BundleTrajectory 

616 metaname = filename / 'metadata.json' 

617 if metaname.is_file(): 

618 mdata = jsonio.decode(metaname.read_text()) 

619 else: 

620 return False 

621 

622 try: 

623 return mdata['format'] == 'BundleTrajectory' 

624 except KeyError: 

625 return False 

626 

627 @staticmethod 

628 def is_empty_bundle(filename): 

629 """Check if a filename is an empty bundle. 

630 

631 Assumes that it is a bundle.""" 

632 if not os.listdir(filename): 

633 return True # Empty folders are empty bundles. 

634 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

635 nframes = int(fd.read()) 

636 

637 # File may be removed by the master immediately after this. 

638 barrier() 

639 return nframes == 0 

640 

641 @classmethod 

642 def delete_bundle(cls, filename): 

643 "Deletes a bundle." 

644 if world.rank == 0: 

645 # Only the master deletes 

646 if not cls.is_bundle(filename, allowempty=True): 

647 raise OSError( 

648 f'Cannot remove "{filename}" as it is not a bundle ' 

649 'trajectory.' 

650 ) 

651 if os.path.islink(filename): 

652 # A symbolic link to a bundle. Only remove the link. 

653 os.remove(filename) 

654 else: 

655 # A real bundle 

656 shutil.rmtree(filename) 

657 else: 

658 # All other tasks wait for the directory to go away. 

659 while os.path.exists(filename): 

660 time.sleep(1) 

661 # The master may not proceed before all tasks have seen the 

662 # directory go away, as it might otherwise create a new bundle 

663 # with the same name, fooling the wait loop in _make_bundledir. 

664 barrier() 

665 

666 def _rename_bundle(self, oldname, newname): 

667 "Rename a bundle. Used to create the .bak" 

668 if self.master: 

669 os.rename(oldname, newname) 

670 else: 

671 while os.path.exists(oldname): 

672 time.sleep(1) 

673 # The master may not proceed before all tasks have seen the 

674 # directory go away. 

675 barrier() 

676 

677 def _make_bundledir(self, filename): 

678 """Make the main bundle directory. 

679 

680 Since all MPI tasks might write to it, all tasks must wait for 

681 the directory to appear. 

682 """ 

683 self.log('Making directory ' + filename) 

684 assert not os.path.isdir(filename) 

685 barrier() 

686 if self.master: 

687 os.mkdir(filename) 

688 else: 

689 i = 0 

690 while not os.path.isdir(filename): 

691 time.sleep(1) 

692 i += 1 

693 if i > 10: 

694 self.log('Waiting %d seconds for %s to appear!' 

695 % (i, filename)) 

696 

697 def _make_framedir(self, frame): 

698 """Make subdirectory for the frame. 

699 

700 As only the master writes to it, no synchronization between 

701 MPI tasks is necessary. 

702 """ 

703 framedir = os.path.join(self.filename, 'F' + str(frame)) 

704 if self.master: 

705 self.log('Making directory ' + framedir) 

706 os.mkdir(framedir) 

707 return framedir 

708 

709 def pre_write_attach(self, function, interval=1, *args, **kwargs): 

710 """Attach a function to be called before writing begins. 

711 

712 function: The function or callable object to be called. 

713 

714 interval: How often the function is called. Default: every time (1). 

715 

716 All other arguments are stored, and passed to the function. 

717 """ 

718 if not callable(function): 

719 raise ValueError('Callback object must be callable.') 

720 self.pre_observers.append((function, interval, args, kwargs)) 

721 

722 def post_write_attach(self, function, interval=1, *args, **kwargs): 

723 """Attach a function to be called after writing ends. 

724 

725 function: The function or callable object to be called. 

726 

727 interval: How often the function is called. Default: every time (1). 

728 

729 All other arguments are stored, and passed to the function. 

730 """ 

731 if not callable(function): 

732 raise ValueError('Callback object must be callable.') 

733 self.post_observers.append((function, interval, args, kwargs)) 

734 

735 def _call_observers(self, obs): 

736 "Call pre/post write observers." 

737 for function, interval, args, kwargs in obs: 

738 if (self.nframes + 1) % interval == 0: 

739 function(*args, **kwargs) 

740 

741 

742class UlmBundleBackend: 

743 """Backend for BundleTrajectories stored as ASE Ulm files.""" 

744 

745 def __init__(self, master, singleprecision): 

746 # Store if this backend will actually write anything 

747 self.writesmall = master 

748 self.writelarge = master 

749 self.singleprecision = singleprecision 

750 

751 # Integer data may be downconverted to the following types 

752 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16', 

753 'uint32', 'int32', 'uint64', 'int64'] 

754 # Dict comprehensions not supported in Python 2.6 :-( 

755 self.int_dtype = {k: getattr(np, k) 

756 for k in self.integral_dtypes} 

757 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min 

758 for k in self.integral_dtypes} 

759 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max 

760 for k in self.integral_dtypes} 

761 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize 

762 for k in self.integral_dtypes} 

763 

764 def write_small(self, framedir, smalldata): 

765 "Write small data to be written jointly." 

766 if self.writesmall: 

767 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd: 

768 fd.write(**smalldata) 

769 

770 def write(self, framedir, name, data): 

771 "Write data to separate file." 

772 if self.writelarge: 

773 shape = data.shape 

774 dtype = str(data.dtype) 

775 stored_as = dtype 

776 all_identical = False 

777 # Check if it a type that can be stored with less space 

778 if np.issubdtype(data.dtype, np.integer): 

779 # An integer type, we may want to convert 

780 minval = data.min() 

781 maxval = data.max() 

782 

783 # ulm cannot write np.bool_: 

784 all_identical = bool(minval == maxval) 

785 if all_identical: 

786 data = int(data.flat[0]) # Convert to standard integer 

787 else: 

788 for typ in self.integral_dtypes: 

789 if (minval >= self.int_minval[typ] and 

790 maxval <= self.int_maxval[typ] and 

791 data.itemsize > self.int_itemsize[typ]): 

792 

793 # Convert to smaller type 

794 stored_as = typ 

795 data = data.astype(self.int_dtype[typ]) 

796 elif data.dtype == np.float32 or data.dtype == np.float64: 

797 all_identical = bool(data.min() == data.max()) 

798 if all_identical: 

799 data = float(data.flat[0]) # Convert to standard float 

800 elif data.dtype == np.float64 and self.singleprecision: 

801 # Downconvert double to single precision 

802 stored_as = 'float32' 

803 data = data.astype(np.float32) 

804 fn = os.path.join(framedir, name + '.ulm') 

805 with ulmopen(fn, 'w') as fd: 

806 fd.write(shape=shape, 

807 dtype=dtype, 

808 stored_as=stored_as, 

809 all_identical=all_identical, 

810 data=data) 

811 

812 def read_small(self, framedir): 

813 "Read small data." 

814 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd: 

815 return fd.asdict() 

816 

817 def read(self, framedir, name): 

818 "Read data from separate file." 

819 fn = os.path.join(framedir, name + '.ulm') 

820 with ulmopen(fn, 'r') as fd: 

821 if fd.all_identical: 

822 # Only a single data value 

823 data = np.zeros(fd.shape, 

824 dtype=getattr(np, fd.dtype)) + fd.data 

825 elif fd.dtype == fd.stored_as: 

826 # Easy, the array can be returned as-is. 

827 data = fd.data 

828 else: 

829 # Cast the data back 

830 data = fd.data.astype(getattr(np, fd.dtype)) 

831 return data 

832 

833 def read_info(self, framedir, name, split=None): 

834 """Read information about file contents without reading the data. 

835 

836 Information is a dictionary containing as aminimum the shape and 

837 type. 

838 """ 

839 fn = os.path.join(framedir, name + '.ulm') 

840 if split is None or os.path.exists(fn): 

841 with ulmopen(fn, 'r') as fd: 

842 info = {} 

843 info['shape'] = fd.shape 

844 info['type'] = fd.dtype 

845 info['stored_as'] = fd.stored_as 

846 info['identical'] = fd.all_identical 

847 return info 

848 else: 

849 info = {} 

850 for i in range(split): 

851 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm') 

852 with ulmopen(fn, 'r') as fd: 

853 if i == 0: 

854 info['shape'] = list(fd.shape) 

855 info['type'] = fd.dtype 

856 info['stored_as'] = fd.stored_as 

857 info['identical'] = fd.all_identical 

858 else: 

859 info['shape'][0] += fd.shape[0] 

860 assert info['type'] == fd.dtype 

861 info['identical'] = (info['identical'] 

862 and fd.all_identical) 

863 info['shape'] = tuple(info['shape']) 

864 return info 

865 

866 def set_fragments(self, nfrag): 

867 self.nfrag = nfrag 

868 

869 def read_split(self, framedir, name): 

870 """Read data from multiple files. 

871 

872 Falls back to reading from single file if that is how data is stored. 

873 

874 Returns the data and an object indicating if the data was really 

875 read from split files. The latter object is False if not 

876 read from split files, but is an array of the segment length if 

877 split files were used. 

878 """ 

879 data = [] 

880 if os.path.exists(os.path.join(framedir, name + '.ulm')): 

881 # Not stored in split form! 

882 return (self.read(framedir, name), False) 

883 for i in range(self.nfrag): 

884 suf = '_%d' % (i,) 

885 data.append(self.read(framedir, name + suf)) 

886 seglengths = [len(d) for d in data] 

887 return (np.concatenate(data), seglengths) 

888 

889 def close(self, log=None): 

890 """Close anything that needs to be closed by the backend. 

891 

892 The default backend does nothing here. 

893 """ 

894 

895 

896def read_bundletrajectory(filename, index=-1): 

897 """Reads one or more atoms objects from a BundleTrajectory. 

898 

899 Arguments: 

900 

901 filename: str 

902 The name of the bundle (really a directory!) 

903 index: int 

904 An integer specifying which frame to read, or an index object 

905 for reading multiple frames. Default: -1 (reads the last 

906 frame). 

907 """ 

908 traj = BundleTrajectory(filename, mode='r') 

909 if isinstance(index, int): 

910 yield traj[index] 

911 

912 for i in range(*index.indices(len(traj))): 

913 yield traj[i] 

914 

915 

916def write_bundletrajectory(filename, images, append=False): 

917 """Write image(s) to a BundleTrajectory. 

918 

919 Write also energy, forces, and stress if they are already 

920 calculated. 

921 """ 

922 

923 if append: 

924 mode = 'a' 

925 else: 

926 mode = 'w' 

927 traj = BundleTrajectory(filename, mode=mode) 

928 

929 if hasattr(images, 'get_positions'): 

930 images = [images] 

931 

932 for atoms in images: 

933 # Avoid potentially expensive calculations: 

934 calc = atoms.calc 

935 if hasattr(calc, 'calculation_required'): 

936 for quantity in ('energy', 'forces', 'stress', 'magmoms'): 

937 traj.select_data(quantity, 

938 not calc.calculation_required(atoms, 

939 [quantity])) 

940 traj.write(atoms) 

941 traj.close() 

942 

943 

944def print_bundletrajectory_info(filename): 

945 """Prints information about a BundleTrajectory. 

946 

947 Mainly intended to be called from a command line tool. 

948 """ 

949 if not BundleTrajectory.is_bundle(filename): 

950 raise ValueError('Not a BundleTrajectory!') 

951 if BundleTrajectory.is_empty_bundle(filename): 

952 print(filename, 'is an empty BundleTrajectory.') 

953 return 

954 # Read the metadata 

955 fn = os.path.join(filename, 'metadata.json') 

956 with open(fn) as fd: 

957 metadata = jsonio.decode(fd.read()) 

958 

959 print(f'Metadata information of BundleTrajectory "{filename}":') 

960 for k, v in metadata.items(): 

961 if k != 'datatypes': 

962 print(f" {k}: {v}") 

963 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

964 nframes = int(fd.read()) 

965 print('Number of frames: %i' % (nframes,)) 

966 print('Data types:') 

967 for k, v in metadata['datatypes'].items(): 

968 if v == 'once': 

969 print(f' {k}: First frame only.') 

970 elif v: 

971 print(f' {k}: All frames.') 

972 # Look at first frame 

973 if metadata['backend'] == 'ulm': 

974 backend = UlmBundleBackend(True, False) 

975 else: 

976 raise NotImplementedError( 

977 f'Backend {metadata["backend"]} not supported.') 

978 frame = os.path.join(filename, 'F0') 

979 small = backend.read_small(frame) 

980 print('Contents of first frame:') 

981 for k, v in small.items(): 

982 if k == 'constraints': 

983 if v: 

984 print(f' {len(v)} constraints are present') 

985 else: 

986 print(' Constraints are absent.') 

987 elif k == 'pbc': 

988 print(f' Periodic boundary conditions: {v!s}') 

989 elif k == 'natoms': 

990 print(' Number of atoms: %i' % (v,)) 

991 elif hasattr(v, 'shape'): 

992 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}') 

993 if k == 'cell': 

994 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0])) 

995 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1])) 

996 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2])) 

997 else: 

998 print(f' {k}: {v!s}') 

999 # Read info from separate files. 

1000 if metadata['subtype'] == 'split': 

1001 nsplit = small['fragments'] 

1002 else: 

1003 nsplit = False 

1004 for k, v in metadata['datatypes'].items(): 

1005 if v and k not in small: 

1006 info = backend.read_info(frame, k, nsplit) 

1007 infoline = f' {k}: ' 

1008 for k, v in info.items(): 

1009 infoline += f'{k} = {v!s}, ' 

1010 infoline = infoline[:-2] + '.' # Fix punctuation. 

1011 print(infoline) 

1012 

1013 

1014def main(): 

1015 import optparse 

1016 parser = optparse.OptionParser( 

1017 usage='python -m ase.io.bundletrajectory ' 

1018 'a.bundle [b.bundle ...]', 

1019 description='Print information about ' 

1020 'the contents of one or more bundletrajectories.') 

1021 _opts, args = parser.parse_args() 

1022 for name in args: 

1023 print_bundletrajectory_info(name) 

1024 

1025 

1026if __name__ == '__main__': 

1027 main()