Coverage for ase / io / bundletrajectory.py: 74.04%

574 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1# fmt: off 

2 

3"""bundletrajectory - a module for I/O from large MD simulations. 

4 

5The BundleTrajectory class writes trajectory into a directory with the 

6following structure, except we now use JSON instead of pickle, 

7so this text needs updating:: 

8 

9 filename.bundle (dir) 

10 metadata.json Data about the file format, and about which 

11 data is present. 

12 frames The number of frames (ascii file) 

13 F0 (dir) Frame number 0 

14 smalldata.ulm Small data structures in a dictionary 

15 (pbc, cell, ...) 

16 numbers.ulm Atomic numbers 

17 positions.ulm Positions 

18 momenta.ulm Momenta 

19 ... 

20 F1 (dir) 

21 

22There is a folder for each frame, and the data is in the ASE Ulm format. 

23""" 

24 

25import os 

26import shutil 

27import sys 

28import time 

29from pathlib import Path 

30 

31import numpy as np 

32 

33from ase import Atoms 

34from ase.calculators.singlepoint import ( 

35 PropertyNotImplementedError, 

36 SinglePointCalculator, 

37) 

38 

39# The system json module causes memory leaks! Use ase's own. 

40# import json 

41from ase.io import jsonio 

42from ase.io.ulm import open as ulmopen 

43from ase.parallel import barrier, paropen, world 

44 

45 

46class BundleTrajectory: 

47 """Reads and writes atoms into a .bundle directory. 

48 

49 The BundleTrajectory is an alternative way of storing 

50 trajectories, intended for large-scale molecular dynamics 

51 simulations, where a single flat file becomes unwieldy. Instead, 

52 the data is stored in directory, a 'bundle' (the name bundle is 

53 inspired from bundles in Mac OS, which are really just directories 

54 the user is supposed to think of as a single file-like unit). 

55 

56 Parameters 

57 ---------- 

58 

59 filename: 

60 The name of the directory. Preferably ending in .bundle. 

61 

62 mode (optional): 

63 The file opening mode. 'r' means open for reading, 'w' for 

64 writing and 'a' for appending. Default: 'r'. If opening in 

65 write mode, and the filename already exists, the old file is 

66 renamed to .bak (any old .bak file is deleted), except if the 

67 existing file is empty. 

68 

69 atoms (optional): 

70 The atoms that will be written. Can only be specified in 

71 write or append mode. If not specified, the atoms must be 

72 given as an argument to the .write() method instead. 

73 

74 backup=True: 

75 Use backup=False to disable renaming of an existing file. 

76 

77 backend='ulm': 

78 Request a backend. Currently only 'ulm' is supported. 

79 Only honored when writing. 

80 

81 singleprecision=False: 

82 Store floating point data in single precision (ulm backend only). 

83 """ 

84 slavelog = True # Log from all nodes 

85 

86 def __init__(self, filename, mode='r', atoms=None, backup=True, 

87 backend='ulm', singleprecision=False): 

88 self.state = 'constructing' 

89 self.filename = filename 

90 self.pre_observers = [] # callback functions before write is performed 

91 self.post_observers = [] # callback functions after write is performed 

92 self.master = world.rank == 0 

93 self.extra_data = [] 

94 self.singleprecision = singleprecision 

95 self._set_defaults() 

96 if mode == 'r': 

97 if atoms is not None: 

98 raise ValueError('You cannot specify atoms in read mode.') 

99 self._open_read() 

100 elif mode == 'w': 

101 self._open_write(atoms, backup, backend) 

102 elif mode == 'a': 

103 self._open_append(atoms, backend) 

104 else: 

105 raise ValueError('Unknown mode: ' + str(mode)) 

106 

107 def _set_defaults(self): 

108 "Set default values for internal parameters." 

109 self.version = 1 

110 self.subtype = 'normal' 

111 self.datatypes = {'positions': True, 

112 'numbers': 'once', 

113 'tags': 'once', 

114 'masses': 'once', 

115 'momenta': True, 

116 'forces': True, 

117 'energy': True, 

118 'energies': False, 

119 'stress': False, 

120 'magmoms': True} 

121 

122 def _set_backend(self, backend): 

123 """Set the backed doing the actual I/O.""" 

124 if backend is not None: 

125 self.backend_name = backend 

126 

127 if self.backend_name == 'ulm': 

128 self.backend = UlmBundleBackend(self.master, self.singleprecision) 

129 else: 

130 raise NotImplementedError( 

131 'This version of ASE cannot use BundleTrajectory ' 

132 'with backend "%s"' % self.backend_name) 

133 

134 def write(self, atoms=None): 

135 """Write the atoms to the file. 

136 

137 If the atoms argument is not given, the atoms object specified 

138 when creating the trajectory object is used. 

139 """ 

140 # Check that we are in write mode 

141 if self.state == 'prewrite': 

142 self.state = 'write' 

143 assert self.nframes == 0 

144 elif self.state != 'write': 

145 raise RuntimeError('Cannot write in ' + self.state + ' mode.') 

146 

147 if atoms is None: 

148 atoms = self.atoms 

149 

150 # Handle NEB etc. If atoms is just a normal Atoms object, it is used 

151 # as-is. 

152 for image in atoms.iterimages(): 

153 self._write_atoms(image) 

154 

155 def _write_atoms(self, atoms): 

156 "Write a single atoms object to file." 

157 self._call_observers(self.pre_observers) 

158 self.log('Beginning to write frame ' + str(self.nframes)) 

159 framedir = self._make_framedir(self.nframes) 

160 

161 # Check which data should be written the first time: 

162 # Modify datatypes so any element of type 'once' becomes true 

163 # for the first frame but false for subsequent frames. 

164 datatypes = {} 

165 for k, v in self.datatypes.items(): 

166 if v == 'once': 

167 v = (self.nframes == 0) 

168 datatypes[k] = v 

169 

170 # Write 'small' data structures. They are written jointly. 

171 smalldata = {'pbc': atoms.get_pbc(), 

172 'cell': atoms.get_cell(), 

173 'natoms': atoms.get_global_number_of_atoms(), 

174 'constraints': atoms.constraints} 

175 if datatypes.get('energy'): 

176 try: 

177 smalldata['energy'] = atoms.get_potential_energy() 

178 except (RuntimeError, PropertyNotImplementedError): 

179 self.datatypes['energy'] = False 

180 if datatypes.get('stress'): 

181 try: 

182 smalldata['stress'] = atoms.get_stress() 

183 except PropertyNotImplementedError: 

184 self.datatypes['stress'] = False 

185 self.backend.write_small(framedir, smalldata) 

186 

187 # Write the large arrays. 

188 if datatypes.get('positions'): 

189 self.backend.write(framedir, 'positions', atoms.get_positions()) 

190 if datatypes.get('numbers'): 

191 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers()) 

192 if datatypes.get('tags'): 

193 if atoms.has('tags'): 

194 self.backend.write(framedir, 'tags', atoms.get_tags()) 

195 else: 

196 self.datatypes['tags'] = False 

197 if datatypes.get('masses'): 

198 if atoms.has('masses'): 

199 self.backend.write(framedir, 'masses', atoms.get_masses()) 

200 else: 

201 self.datatypes['masses'] = False 

202 if datatypes.get('momenta'): 

203 if atoms.has('momenta'): 

204 self.backend.write(framedir, 'momenta', atoms.get_momenta()) 

205 else: 

206 self.datatypes['momenta'] = False 

207 if datatypes.get('magmoms'): 

208 if atoms.has('initial_magmoms'): 

209 self.backend.write(framedir, 'magmoms', 

210 atoms.get_initial_magnetic_moments()) 

211 else: 

212 self.datatypes['magmoms'] = False 

213 if datatypes.get('forces'): 

214 try: 

215 x = atoms.get_forces() 

216 except (RuntimeError, PropertyNotImplementedError): 

217 self.datatypes['forces'] = False 

218 else: 

219 self.backend.write(framedir, 'forces', x) 

220 del x 

221 if datatypes.get('energies'): 

222 try: 

223 x = atoms.get_potential_energies() 

224 except (RuntimeError, PropertyNotImplementedError): 

225 self.datatypes['energies'] = False 

226 else: 

227 self.backend.write(framedir, 'energies', x) 

228 del x 

229 # Write any extra data 

230 for (label, source, once) in self.extra_data: 

231 if self.nframes == 0 or not once: 

232 if source is not None: 

233 x = source() 

234 else: 

235 x = atoms.get_array(label) 

236 self.backend.write(framedir, label, x) 

237 del x 

238 if once: 

239 self.datatypes[label] = 'once' 

240 else: 

241 self.datatypes[label] = True 

242 # Finally, write metadata if it is the first frame 

243 if self.nframes == 0: 

244 metadata = {'datatypes': self.datatypes} 

245 self._write_metadata(metadata) 

246 self._write_nframes(self.nframes + 1) 

247 self._call_observers(self.post_observers) 

248 self.log('Done writing frame ' + str(self.nframes)) 

249 self.nframes += 1 

250 

251 def select_data(self, data, value): 

252 """Selects if a given data type should be written. 

253 

254 Data can be written in every frame (specify True), in the 

255 first frame only (specify 'only') or not at all (specify 

256 False). Not all data types support the 'only' keyword, if not 

257 supported it is interpreted as True. 

258 

259 The following data types are supported, the letter in parenthesis 

260 indicates the default: 

261 

262 positions (T), numbers (O), tags (O), masses (O), momenta (T), 

263 forces (T), energy (T), energies (F), stress (F), magmoms (T) 

264 

265 If a given property is not present during the first write, it 

266 will be not be saved at all. 

267 """ 

268 if value not in (True, False, 'once'): 

269 raise ValueError('Unknown write mode') 

270 if data not in self.datatypes: 

271 raise ValueError('Unsupported data type: ' + data) 

272 self.datatypes[data] = value 

273 

274 def set_extra_data(self, name, source=None, once=False): 

275 """Adds extra data to be written. 

276 

277 Parameters 

278 ---------- 

279 name: The name of the data. 

280 

281 source (optional): If specified, a callable object returning 

282 the data to be written. If not specified it is instead 

283 assumed that the atoms contains the data as an array of the 

284 same name. 

285 

286 once (optional): If specified and True, the data will only be 

287 written to the first frame. 

288 """ 

289 self.extra_data.append((name, source, once)) 

290 

291 def close(self): 

292 "Closes the trajectory." 

293 self.state = 'closed' 

294 lf = getattr(self, 'logfile', None) 

295 self.backend.close(log=lf) 

296 if lf is not None: 

297 lf.close() 

298 del self.logfile 

299 

300 def log(self, text): 

301 """Write to the log file in the bundle. 

302 

303 Logging is only possible in write/append mode. 

304 

305 This function is mainly for internal use, but can also be called by 

306 the user. 

307 """ 

308 if not (self.master or self.slavelog): 

309 return 

310 text = time.asctime() + ': ' + text 

311 if hasattr(self, 'logfile'): 

312 # Logging enabled 

313 if self.logfile is None: 

314 # Logfile not yet open 

315 try: 

316 self.logdata.append(text) 

317 except AttributeError: 

318 self.logdata = [text] 

319 else: 

320 self.logfile.write(text + '\n') 

321 self.logfile.flush() 

322 else: 

323 raise RuntimeError('Cannot write to log file in mode ' + 

324 self.state) 

325 

326 # __getitem__ is the main reading method. 

327 def __getitem__(self, n): 

328 return self._read(n) 

329 

330 def _read(self, n): 

331 """Read an atoms object from the BundleTrajectory.""" 

332 if self.state != 'read': 

333 raise OSError(f'Cannot read in {self.state} mode') 

334 if n < 0: 

335 n += self.nframes 

336 if n < 0 or n >= self.nframes: 

337 raise IndexError('Trajectory index %d out of range [0, %d[' 

338 % (n, self.nframes)) 

339 

340 framedir = os.path.join(self.filename, 'F' + str(n)) 

341 framezero = os.path.join(self.filename, 'F0') 

342 smalldata = self.backend.read_small(framedir) 

343 data = {} 

344 data['pbc'] = smalldata['pbc'] 

345 data['cell'] = smalldata['cell'] 

346 data['constraint'] = smalldata['constraints'] 

347 if self.subtype == 'split': 

348 self.backend.set_fragments(smalldata['fragments']) 

349 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID') 

350 else: 

351 self.atom_id = None 

352 atoms = Atoms(**data) 

353 natoms = smalldata['natoms'] 

354 for name in ('positions', 'numbers', 'tags', 'masses', 

355 'momenta'): 

356 if self.datatypes.get(name): 

357 atoms.arrays[name] = self._read_data(framezero, framedir, 

358 name, self.atom_id) 

359 assert len(atoms.arrays[name]) == natoms 

360 

361 # Create the atoms object 

362 if self.datatypes.get('energy'): 

363 if self.datatypes.get('forces'): 

364 forces = self.backend.read(framedir, 'forces') 

365 else: 

366 forces = None 

367 if self.datatypes.get('magmoms'): 

368 magmoms = self.backend.read(framedir, 'magmoms') 

369 else: 

370 magmoms = None 

371 calc = SinglePointCalculator(atoms, 

372 energy=smalldata.get('energy'), 

373 forces=forces, 

374 stress=smalldata.get('stress'), 

375 magmoms=magmoms) 

376 atoms.calc = calc 

377 return atoms 

378 

379 def read_extra_data(self, name, n=0): 

380 """Read extra data stored alongside the atoms. 

381 

382 Currently only used to read data stored by an NPT dynamics object. 

383 The data is not associated with individual atoms. 

384 """ 

385 if self.state != 'read': 

386 raise OSError(f'Cannot read extra data in {self.state} mode') 

387 # Handle negative n. 

388 if n < 0: 

389 n += self.nframes 

390 if n < 0 or n >= self.nframes: 

391 raise IndexError('Trajectory index %d out of range [0, %d[' 

392 % (n, self.nframes)) 

393 framedir = os.path.join(self.filename, 'F' + str(n)) 

394 framezero = os.path.join(self.filename, 'F0') 

395 return self._read_data(framezero, framedir, name, self.atom_id) 

396 

397 def _read_data(self, f0, f, name, atom_id): 

398 "Read single data item." 

399 

400 if self.subtype == 'normal': 

401 if self.datatypes[name] == 'once': 

402 d = self.backend.read(f0, name) 

403 else: 

404 d = self.backend.read(f, name) 

405 elif self.subtype == 'split': 

406 if self.datatypes[name] == 'once': 

407 d, issplit = self.backend.read_split(f0, name) 

408 atom_id, _dummy = self.backend.read_split(f0, 'ID') 

409 else: 

410 d, issplit = self.backend.read_split(f, name) 

411 if issplit: 

412 assert atom_id is not None 

413 assert len(d) == len(atom_id) 

414 d[atom_id] = np.array(d) 

415 return d 

416 

417 def __len__(self): 

418 return self.nframes 

419 

420 def _open_log(self): 

421 "Open the log file." 

422 if not (self.master or self.slavelog): 

423 return 

424 if self.master: 

425 lfn = os.path.join(self.filename, 'log.txt') 

426 else: 

427 lfn = os.path.join(self.filename, ('log-node%d.txt' % 

428 (world.rank,))) 

429 self.logfile = open(lfn, 'a', 1) # Append to log if it exists. 

430 if hasattr(self, 'logdata'): 

431 for text in self.logdata: 

432 self.logfile.write(text + '\n') 

433 self.logfile.flush() 

434 del self.logdata 

435 

436 def _open_write(self, atoms, backup, backend): 

437 """Open a bundle trajectory for writing. 

438 

439 Parameters 

440 ---------- 

441 atoms: Object to be written. 

442 backup: (bool) Whether a backup is kept if file already exists. 

443 backend: Which type of backend to use. 

444 

445 Note that the file name of the bundle is already stored as an attribute. 

446 """ 

447 self._set_backend(backend) 

448 self.logfile = None # enable delayed logging 

449 self.atoms = atoms 

450 if os.path.exists(self.filename): 

451 # The output directory already exists. 

452 barrier() # all must have time to see it exists 

453 if not self.is_bundle(self.filename, allowempty=True): 

454 raise OSError( 

455 'Filename "' + self.filename + 

456 '" already exists, but is not a BundleTrajectory.' + 

457 ' Cowardly refusing to remove it.') 

458 if self.is_empty_bundle(self.filename): 

459 barrier() 

460 self.log(f'Deleting old "{self.filename}" as it is empty') 

461 self.delete_bundle(self.filename) 

462 elif not backup: 

463 barrier() 

464 self.log( 

465 f'Deleting old "{self.filename}" as backup is turned off.') 

466 self.delete_bundle(self.filename) 

467 else: 

468 barrier() 

469 # Make a backup file 

470 bakname = self.filename + '.bak' 

471 if os.path.exists(bakname): 

472 barrier() # All must see it exists 

473 self.log(f'Deleting old backup file "{bakname}"') 

474 self.delete_bundle(bakname) 

475 self.log(f'Renaming "{self.filename}" to "{bakname}"') 

476 self._rename_bundle(self.filename, bakname) 

477 # Ready to create a new bundle. 

478 barrier() 

479 self.log(f'Creating new "{self.filename}"') 

480 self._make_bundledir(self.filename) 

481 self.state = 'prewrite' 

482 self._write_metadata({}) 

483 self._write_nframes(0) # Mark new bundle as empty 

484 self._open_log() 

485 self.nframes = 0 

486 

487 def _open_read(self): 

488 "Open a bundle trajectory for reading." 

489 if not os.path.exists(self.filename): 

490 raise OSError('File not found: ' + self.filename) 

491 if not self.is_bundle(self.filename): 

492 raise OSError('Not a BundleTrajectory: ' + self.filename) 

493 self.state = 'read' 

494 # Read the metadata 

495 metadata = self._read_metadata() 

496 self.metadata = metadata 

497 if metadata['version'] > self.version: 

498 raise NotImplementedError( 

499 'This version of ASE cannot read a BundleTrajectory version ' + 

500 str(metadata['version'])) 

501 if metadata['subtype'] not in ('normal', 'split'): 

502 raise NotImplementedError( 

503 'This version of ASE cannot read BundleTrajectory subtype ' + 

504 metadata['subtype']) 

505 self.subtype = metadata['subtype'] 

506 if metadata['backend'] == 'ulm': 

507 self.singleprecision = metadata['ulm.singleprecision'] 

508 self._set_backend(metadata['backend']) 

509 self.nframes = self._read_nframes() 

510 if self.nframes == 0: 

511 raise OSError('Empty BundleTrajectory') 

512 self.datatypes = metadata['datatypes'] 

513 try: 

514 self.pythonmajor = metadata['python_ver'][0] 

515 except KeyError: 

516 self.pythonmajor = 2 # Assume written with Python 2. 

517 # We need to know if we are running Python 3.X and try to read 

518 # a bundle written with Python 2.X 

519 self.backend.readpy2 = (self.pythonmajor == 2) 

520 self.state = 'read' 

521 

522 def _open_append(self, atoms, backend): 

523 """Open a trajectory for writing in append mode. 

524 

525 If there is no pre-existing bundle, it is just opened in write mode 

526 instead. 

527 

528 Parameters 

529 ---------- 

530 atoms: Object to be written. 

531 backend: The backend to be used if a new bundle is opened. Ignored 

532 if we append to existing bundle, as the backend cannot be 

533 changed. 

534 

535 The filename is already stored as an attribute. 

536 """ 

537 if not os.path.exists(self.filename): 

538 # OK, no old bundle. Open as for write instead. 

539 barrier() 

540 self._open_write(atoms, False, backend) 

541 return 

542 if not self.is_bundle(self.filename): 

543 raise OSError('Not a BundleTrajectory: ' + self.filename) 

544 self.state = 'read' 

545 metadata = self._read_metadata() 

546 self.metadata = metadata 

547 if metadata['version'] != self.version: 

548 raise NotImplementedError( 

549 'Cannot append to a BundleTrajectory version ' 

550 '%s (supported version is %s)' % (str(metadata['version']), 

551 str(self.version))) 

552 if metadata['subtype'] not in ('normal', 'split'): 

553 raise NotImplementedError( 

554 'This version of ASE cannot append to BundleTrajectory ' 

555 'subtype ' + metadata['subtype']) 

556 self.subtype = metadata['subtype'] 

557 if metadata['backend'] == 'ulm': 

558 self.singleprecision = metadata['ulm.singleprecision'] 

559 self._set_backend(metadata['backend']) 

560 self.nframes = self._read_nframes() 

561 self._open_log() 

562 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename, 

563 self.nframes)) 

564 self.state = 'write' 

565 self.atoms = atoms 

566 

567 @property 

568 def path(self): 

569 return Path(self.filename) 

570 

571 @property 

572 def metadata_path(self): 

573 return self.path / 'metadata.json' 

574 

575 def _write_nframes(self, n): 

576 "Write the number of frames in the bundle." 

577 assert self.state == 'write' or self.state == 'prewrite' 

578 with paropen(self.path / 'frames', 'w') as fd: 

579 fd.write(str(n) + '\n') 

580 

581 def _read_nframes(self): 

582 "Read the number of frames." 

583 return int((self.path / 'frames').read_text()) 

584 

585 def _write_metadata(self, metadata): 

586 """Write the metadata file of the bundle. 

587 

588 Modifies the medadata dictionary! 

589 """ 

590 # Add standard fields that must always be present. 

591 assert self.state == 'write' or self.state == 'prewrite' 

592 metadata['format'] = 'BundleTrajectory' 

593 metadata['version'] = self.version 

594 metadata['subtype'] = self.subtype 

595 metadata['backend'] = self.backend_name 

596 if self.backend_name == 'ulm': 

597 metadata['ulm.singleprecision'] = self.singleprecision 

598 metadata['python_ver'] = tuple(sys.version_info) 

599 encode = jsonio.MyEncoder(indent=4).encode 

600 fido = encode(metadata) 

601 with paropen(self.metadata_path, 'w') as fd: 

602 fd.write(fido) 

603 

604 def _read_metadata(self): 

605 """Read the metadata.""" 

606 assert self.state == 'read' 

607 return jsonio.decode(self.metadata_path.read_text()) 

608 

609 @staticmethod 

610 def is_bundle(filename, allowempty=False): 

611 """Check if a filename exists and is a BundleTrajectory. 

612 

613 If allowempty=True, an empty folder is regarded as an 

614 empty BundleTrajectory.""" 

615 filename = Path(filename) 

616 if not filename.is_dir(): 

617 return False 

618 if allowempty and not os.listdir(filename): 

619 return True # An empty BundleTrajectory 

620 metaname = filename / 'metadata.json' 

621 if metaname.is_file(): 

622 mdata = jsonio.decode(metaname.read_text()) 

623 else: 

624 return False 

625 

626 try: 

627 return mdata['format'] == 'BundleTrajectory' 

628 except KeyError: 

629 return False 

630 

631 @staticmethod 

632 def is_empty_bundle(filename): 

633 """Check if a filename is an empty bundle. 

634 

635 Assumes that it is a bundle.""" 

636 if not os.listdir(filename): 

637 return True # Empty folders are empty bundles. 

638 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

639 nframes = int(fd.read()) 

640 

641 # File may be removed by the master immediately after this. 

642 barrier() 

643 return nframes == 0 

644 

645 @classmethod 

646 def delete_bundle(cls, filename): 

647 "Deletes a bundle." 

648 if world.rank == 0: 

649 # Only the master deletes 

650 if not cls.is_bundle(filename, allowempty=True): 

651 raise OSError( 

652 f'Cannot remove "{filename}" as it is not a bundle ' 

653 'trajectory.' 

654 ) 

655 if os.path.islink(filename): 

656 # A symbolic link to a bundle. Only remove the link. 

657 os.remove(filename) 

658 else: 

659 # A real bundle 

660 shutil.rmtree(filename) 

661 else: 

662 # All other tasks wait for the directory to go away. 

663 while os.path.exists(filename): 

664 time.sleep(1) 

665 # The master may not proceed before all tasks have seen the 

666 # directory go away, as it might otherwise create a new bundle 

667 # with the same name, fooling the wait loop in _make_bundledir. 

668 barrier() 

669 

670 def _rename_bundle(self, oldname, newname): 

671 "Rename a bundle. Used to create the .bak" 

672 if self.master: 

673 os.rename(oldname, newname) 

674 else: 

675 while os.path.exists(oldname): 

676 time.sleep(1) 

677 # The master may not proceed before all tasks have seen the 

678 # directory go away. 

679 barrier() 

680 

681 def _make_bundledir(self, filename): 

682 """Make the main bundle directory. 

683 

684 Since all MPI tasks might write to it, all tasks must wait for 

685 the directory to appear. 

686 """ 

687 self.log('Making directory ' + filename) 

688 assert not os.path.isdir(filename) 

689 barrier() 

690 if self.master: 

691 os.mkdir(filename) 

692 else: 

693 i = 0 

694 while not os.path.isdir(filename): 

695 time.sleep(1) 

696 i += 1 

697 if i > 10: 

698 self.log('Waiting %d seconds for %s to appear!' 

699 % (i, filename)) 

700 

701 def _make_framedir(self, frame): 

702 """Make subdirectory for the frame. 

703 

704 As only the master writes to it, no synchronization between 

705 MPI tasks is necessary. 

706 """ 

707 framedir = os.path.join(self.filename, 'F' + str(frame)) 

708 if self.master: 

709 self.log('Making directory ' + framedir) 

710 os.mkdir(framedir) 

711 return framedir 

712 

713 def pre_write_attach(self, function, interval=1, *args, **kwargs): 

714 """Attach a function to be called before writing begins. 

715 

716 function: The function or callable object to be called. 

717 

718 interval: How often the function is called. Default: every time (1). 

719 

720 All other arguments are stored, and passed to the function. 

721 """ 

722 if not callable(function): 

723 raise ValueError('Callback object must be callable.') 

724 self.pre_observers.append((function, interval, args, kwargs)) 

725 

726 def post_write_attach(self, function, interval=1, *args, **kwargs): 

727 """Attach a function to be called after writing ends. 

728 

729 function: The function or callable object to be called. 

730 

731 interval: How often the function is called. Default: every time (1). 

732 

733 All other arguments are stored, and passed to the function. 

734 """ 

735 if not callable(function): 

736 raise ValueError('Callback object must be callable.') 

737 self.post_observers.append((function, interval, args, kwargs)) 

738 

739 def _call_observers(self, obs): 

740 "Call pre/post write observers." 

741 for function, interval, args, kwargs in obs: 

742 if (self.nframes + 1) % interval == 0: 

743 function(*args, **kwargs) 

744 

745 

746class UlmBundleBackend: 

747 """Backend for BundleTrajectories stored as ASE Ulm files.""" 

748 

749 def __init__(self, master, singleprecision): 

750 # Store if this backend will actually write anything 

751 self.writesmall = master 

752 self.writelarge = master 

753 self.singleprecision = singleprecision 

754 

755 # Integer data may be downconverted to the following types 

756 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16', 

757 'uint32', 'int32', 'uint64', 'int64'] 

758 # Dict comprehensions not supported in Python 2.6 :-( 

759 self.int_dtype = {k: getattr(np, k) 

760 for k in self.integral_dtypes} 

761 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min 

762 for k in self.integral_dtypes} 

763 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max 

764 for k in self.integral_dtypes} 

765 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize 

766 for k in self.integral_dtypes} 

767 

768 def write_small(self, framedir, smalldata): 

769 "Write small data to be written jointly." 

770 if self.writesmall: 

771 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd: 

772 fd.write(**smalldata) 

773 

774 def write(self, framedir, name, data): 

775 "Write data to separate file." 

776 if self.writelarge: 

777 shape = data.shape 

778 dtype = str(data.dtype) 

779 stored_as = dtype 

780 all_identical = False 

781 # Check if it a type that can be stored with less space 

782 if np.issubdtype(data.dtype, np.integer): 

783 # An integer type, we may want to convert 

784 minval = data.min() 

785 maxval = data.max() 

786 

787 # ulm cannot write np.bool_: 

788 all_identical = bool(minval == maxval) 

789 if all_identical: 

790 data = int(data.flat[0]) # Convert to standard integer 

791 else: 

792 for typ in self.integral_dtypes: 

793 if (minval >= self.int_minval[typ] and 

794 maxval <= self.int_maxval[typ] and 

795 data.itemsize > self.int_itemsize[typ]): 

796 

797 # Convert to smaller type 

798 stored_as = typ 

799 data = data.astype(self.int_dtype[typ]) 

800 elif data.dtype == np.float32 or data.dtype == np.float64: 

801 all_identical = bool(data.min() == data.max()) 

802 if all_identical: 

803 data = float(data.flat[0]) # Convert to standard float 

804 elif data.dtype == np.float64 and self.singleprecision: 

805 # Downconvert double to single precision 

806 stored_as = 'float32' 

807 data = data.astype(np.float32) 

808 fn = os.path.join(framedir, name + '.ulm') 

809 with ulmopen(fn, 'w') as fd: 

810 fd.write(shape=shape, 

811 dtype=dtype, 

812 stored_as=stored_as, 

813 all_identical=all_identical, 

814 data=data) 

815 

816 def read_small(self, framedir): 

817 "Read small data." 

818 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd: 

819 return fd.asdict() 

820 

821 def read(self, framedir, name): 

822 "Read data from separate file." 

823 fn = os.path.join(framedir, name + '.ulm') 

824 with ulmopen(fn, 'r') as fd: 

825 if fd.all_identical: 

826 # Only a single data value 

827 data = np.zeros(fd.shape, 

828 dtype=getattr(np, fd.dtype)) + fd.data 

829 elif fd.dtype == fd.stored_as: 

830 # Easy, the array can be returned as-is. 

831 data = fd.data 

832 else: 

833 # Cast the data back 

834 data = fd.data.astype(getattr(np, fd.dtype)) 

835 return data 

836 

837 def read_info(self, framedir, name, split=None): 

838 """Read information about file contents without reading the data. 

839 

840 Information is a dictionary containing as aminimum the shape and 

841 type. 

842 """ 

843 fn = os.path.join(framedir, name + '.ulm') 

844 if split is None or os.path.exists(fn): 

845 with ulmopen(fn, 'r') as fd: 

846 info = {} 

847 info['shape'] = fd.shape 

848 info['type'] = fd.dtype 

849 info['stored_as'] = fd.stored_as 

850 info['identical'] = fd.all_identical 

851 return info 

852 else: 

853 info = {} 

854 for i in range(split): 

855 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm') 

856 with ulmopen(fn, 'r') as fd: 

857 if i == 0: 

858 info['shape'] = list(fd.shape) 

859 info['type'] = fd.dtype 

860 info['stored_as'] = fd.stored_as 

861 info['identical'] = fd.all_identical 

862 else: 

863 info['shape'][0] += fd.shape[0] 

864 assert info['type'] == fd.dtype 

865 info['identical'] = (info['identical'] 

866 and fd.all_identical) 

867 info['shape'] = tuple(info['shape']) 

868 return info 

869 

870 def set_fragments(self, nfrag): 

871 self.nfrag = nfrag 

872 

873 def read_split(self, framedir, name): 

874 """Read data from multiple files. 

875 

876 Falls back to reading from single file if that is how data is stored. 

877 

878 Returns the data and an object indicating if the data was really 

879 read from split files. The latter object is False if not 

880 read from split files, but is an array of the segment length if 

881 split files were used. 

882 """ 

883 data = [] 

884 if os.path.exists(os.path.join(framedir, name + '.ulm')): 

885 # Not stored in split form! 

886 return (self.read(framedir, name), False) 

887 for i in range(self.nfrag): 

888 suf = '_%d' % (i,) 

889 data.append(self.read(framedir, name + suf)) 

890 seglengths = [len(d) for d in data] 

891 return (np.concatenate(data), seglengths) 

892 

893 def close(self, log=None): 

894 """Close anything that needs to be closed by the backend. 

895 

896 The default backend does nothing here. 

897 """ 

898 

899 

900def read_bundletrajectory(filename, index=-1): 

901 """Reads one or more atoms objects from a BundleTrajectory. 

902 

903 Arguments: 

904 

905 filename: str 

906 The name of the bundle (really a directory!) 

907 index: int 

908 An integer specifying which frame to read, or an index object 

909 for reading multiple frames. Default: -1 (reads the last 

910 frame). 

911 """ 

912 traj = BundleTrajectory(filename, mode='r') 

913 if isinstance(index, int): 

914 yield traj[index] 

915 

916 for i in range(*index.indices(len(traj))): 

917 yield traj[i] 

918 

919 

920def write_bundletrajectory(filename, images, append=False): 

921 """Write image(s) to a BundleTrajectory. 

922 

923 Write also energy, forces, and stress if they are already 

924 calculated. 

925 """ 

926 

927 if append: 

928 mode = 'a' 

929 else: 

930 mode = 'w' 

931 traj = BundleTrajectory(filename, mode=mode) 

932 

933 if hasattr(images, 'get_positions'): 

934 images = [images] 

935 

936 for atoms in images: 

937 # Avoid potentially expensive calculations: 

938 calc = atoms.calc 

939 if hasattr(calc, 'calculation_required'): 

940 for quantity in ('energy', 'forces', 'stress', 'magmoms'): 

941 traj.select_data(quantity, 

942 not calc.calculation_required(atoms, 

943 [quantity])) 

944 traj.write(atoms) 

945 traj.close() 

946 

947 

948def print_bundletrajectory_info(filename): 

949 """Prints information about a BundleTrajectory. 

950 

951 Mainly intended to be called from a command line tool. 

952 """ 

953 if not BundleTrajectory.is_bundle(filename): 

954 raise ValueError('Not a BundleTrajectory!') 

955 if BundleTrajectory.is_empty_bundle(filename): 

956 print(filename, 'is an empty BundleTrajectory.') 

957 return 

958 # Read the metadata 

959 fn = os.path.join(filename, 'metadata.json') 

960 with open(fn) as fd: 

961 metadata = jsonio.decode(fd.read()) 

962 

963 print(f'Metadata information of BundleTrajectory "{filename}":') 

964 for k, v in metadata.items(): 

965 if k != 'datatypes': 

966 print(f" {k}: {v}") 

967 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

968 nframes = int(fd.read()) 

969 print('Number of frames: %i' % (nframes,)) 

970 print('Data types:') 

971 for k, v in metadata['datatypes'].items(): 

972 if v == 'once': 

973 print(f' {k}: First frame only.') 

974 elif v: 

975 print(f' {k}: All frames.') 

976 # Look at first frame 

977 if metadata['backend'] == 'ulm': 

978 backend = UlmBundleBackend(True, False) 

979 else: 

980 raise NotImplementedError( 

981 f'Backend {metadata["backend"]} not supported.') 

982 frame = os.path.join(filename, 'F0') 

983 small = backend.read_small(frame) 

984 print('Contents of first frame:') 

985 for k, v in small.items(): 

986 if k == 'constraints': 

987 if v: 

988 print(f' {len(v)} constraints are present') 

989 else: 

990 print(' Constraints are absent.') 

991 elif k == 'pbc': 

992 print(f' Periodic boundary conditions: {v!s}') 

993 elif k == 'natoms': 

994 print(' Number of atoms: %i' % (v,)) 

995 elif hasattr(v, 'shape'): 

996 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}') 

997 if k == 'cell': 

998 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0])) 

999 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1])) 

1000 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2])) 

1001 else: 

1002 print(f' {k}: {v!s}') 

1003 # Read info from separate files. 

1004 if metadata['subtype'] == 'split': 

1005 nsplit = small['fragments'] 

1006 else: 

1007 nsplit = False 

1008 for k, v in metadata['datatypes'].items(): 

1009 if v and k not in small: 

1010 info = backend.read_info(frame, k, nsplit) 

1011 infoline = f' {k}: ' 

1012 for k, v in info.items(): 

1013 infoline += f'{k} = {v!s}, ' 

1014 infoline = infoline[:-2] + '.' # Fix punctuation. 

1015 print(infoline) 

1016 

1017 

1018def main(): 

1019 import optparse 

1020 parser = optparse.OptionParser( 

1021 usage='python -m ase.io.bundletrajectory ' 

1022 'a.bundle [b.bundle ...]', 

1023 description='Print information about ' 

1024 'the contents of one or more bundletrajectories.') 

1025 _opts, args = parser.parse_args() 

1026 for name in args: 

1027 print_bundletrajectory_info(name) 

1028 

1029 

1030if __name__ == '__main__': 

1031 main()