Coverage for ase / io / formats.py: 90.99%

533 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1# fmt: off 

2 

3"""File formats. 

4 

5This module implements the read(), iread() and write() functions in ase.io. 

6For each file format there is an IOFormat object. 

7 

8There is a dict, ioformats, which stores the objects. 

9 

10Example 

11======= 

12 

13The xyz format is implemented in the ase/io/xyz.py file which has a 

14read_xyz() generator and a write_xyz() function. This and other 

15information can be obtained from ioformats['xyz']. 

16""" 

17 

18import functools 

19import inspect 

20import io 

21import os 

22import re 

23import sys 

24import warnings 

25from collections.abc import Iterator, Sequence 

26from importlib import import_module 

27from importlib.metadata import entry_points 

28from pathlib import PurePath 

29from typing import ( 

30 IO, 

31 Any, 

32) 

33 

34from ase.atoms import Atoms 

35from ase.parallel import parallel_function, parallel_generator 

36from ase.utils import string2index 

37from ase.utils.plugins import ExternalIOFormat 

38 

39PEEK_BYTES = 50000 

40 

41 

42class UnknownFileTypeError(Exception): 

43 pass 

44 

45 

46class IOFormat: 

47 def __init__(self, name: str, desc: str, code: str, module_name: str, 

48 encoding: str | None = None) -> None: 

49 self.name = name 

50 self.description = desc 

51 assert len(code) == 2 

52 assert code[0] in list('+1') 

53 assert code[1] in list('BFS') 

54 self.code = code 

55 self.module_name = module_name 

56 self.encoding = encoding 

57 

58 # (To be set by define_io_format()) 

59 self.extensions: list[str] = [] 

60 self.globs: list[str] = [] 

61 self.magic: list[str] = [] 

62 self.magic_regex: bytes | None = None 

63 

64 def _buf_as_filelike(self, data: str | bytes) -> IO: 

65 encoding = self.encoding 

66 if encoding is None: 

67 encoding = 'utf-8' # Best hacky guess. 

68 

69 if self.isbinary: 

70 if isinstance(data, str): 

71 data = data.encode(encoding) 

72 else: 

73 if isinstance(data, bytes): 

74 data = data.decode(encoding) 

75 

76 return self._ioclass(data) 

77 

78 @property 

79 def _ioclass(self): 

80 if self.isbinary: 

81 return io.BytesIO 

82 else: 

83 return io.StringIO 

84 

85 def parse_images(self, data: str | bytes, 

86 **kwargs) -> Sequence[Atoms]: 

87 with self._buf_as_filelike(data) as fd: 

88 outputs = self.read(fd, **kwargs) 

89 if self.single: 

90 assert isinstance(outputs, Atoms) 

91 return [outputs] 

92 else: 

93 return list(self.read(fd, **kwargs)) 

94 

95 def parse_atoms(self, data: str | bytes, **kwargs) -> Atoms: 

96 images = self.parse_images(data, **kwargs) 

97 return images[-1] 

98 

99 @property 

100 def can_read(self) -> bool: 

101 return self._readfunc() is not None 

102 

103 @property 

104 def can_write(self) -> bool: 

105 return self._writefunc() is not None 

106 

107 @property 

108 def can_append(self) -> bool: 

109 writefunc = self._writefunc() 

110 return self.can_write and 'append' in writefunc.__code__.co_varnames 

111 

112 def __repr__(self) -> str: 

113 tokens = [f'{name}={value!r}' 

114 for name, value in vars(self).items()] 

115 return 'IOFormat({})'.format(', '.join(tokens)) 

116 

117 def __getitem__(self, i): 

118 # For compatibility. 

119 # 

120 # Historically, the ioformats were listed as tuples 

121 # with (description, code). We look like such a tuple. 

122 return (self.description, self.code)[i] 

123 

124 @property 

125 def single(self) -> bool: 

126 """Whether this format is for a single Atoms object.""" 

127 return self.code[0] == '1' 

128 

129 @property 

130 def _formatname(self) -> str: 

131 return self.name.replace('-', '_') 

132 

133 def _readfunc(self): 

134 return getattr(self.module, 'read_' + self._formatname, None) 

135 

136 def _writefunc(self): 

137 return getattr(self.module, 'write_' + self._formatname, None) 

138 

139 @property 

140 def read(self): 

141 if not self.can_read: 

142 self._warn_none('read') 

143 return None 

144 

145 return self._read_wrapper 

146 

147 def _read_wrapper(self, *args, **kwargs): 

148 function = self._readfunc() 

149 if function is None: 

150 self._warn_none('read') 

151 return None 

152 if not inspect.isgeneratorfunction(function): 

153 function = functools.partial(wrap_read_function, function) 

154 return function(*args, **kwargs) 

155 

156 def _warn_none(self, action): 

157 msg = ('Accessing the IOFormat.{action} property on a format ' 

158 'without {action} support will change behaviour in the ' 

159 'future and return a callable instead of None. ' 

160 'Use IOFormat.can_{action} to check whether {action} ' 

161 'is supported.') 

162 warnings.warn(msg.format(action=action), FutureWarning) 

163 

164 @property 

165 def write(self): 

166 if not self.can_write: 

167 self._warn_none('write') 

168 return None 

169 

170 return self._write_wrapper 

171 

172 def _write_wrapper(self, *args, **kwargs): 

173 function = self._writefunc() 

174 if function is None: 

175 raise ValueError(f'Cannot write to {self.name}-format') 

176 return function(*args, **kwargs) 

177 

178 @property 

179 def modes(self) -> str: 

180 modes = '' 

181 if self.can_read: 

182 modes += 'r' 

183 if self.can_write: 

184 modes += 'w' 

185 return modes 

186 

187 def full_description(self) -> str: 

188 lines = [f'Name: {self.name}', 

189 f'Description: {self.description}', 

190 f'Modes: {self.modes}', 

191 f'Encoding: {self.encoding}', 

192 f'Module: {self.module_name}', 

193 f'Code: {self.code}', 

194 f'Extensions: {self.extensions}', 

195 f'Globs: {self.globs}', 

196 f'Magic: {self.magic}'] 

197 return '\n'.join(lines) 

198 

199 @property 

200 def acceptsfd(self) -> bool: 

201 return self.code[1] != 'S' 

202 

203 @property 

204 def isbinary(self) -> bool: 

205 return self.code[1] == 'B' 

206 

207 @property 

208 def module(self): 

209 try: 

210 return import_module(self.module_name) 

211 except ImportError as err: 

212 raise UnknownFileTypeError( 

213 f'File format not recognized: {self.name}. Error: {err}') 

214 

215 def match_name(self, basename: str) -> bool: 

216 from fnmatch import fnmatch 

217 return any(fnmatch(basename, pattern) 

218 for pattern in self.globs) 

219 

220 def match_magic(self, data: bytes) -> bool: 

221 if self.magic_regex: 

222 assert not self.magic, 'Define only one of magic and magic_regex' 

223 match = re.match(self.magic_regex, data, re.M | re.S) 

224 return match is not None 

225 

226 from fnmatch import fnmatchcase 

227 return any( 

228 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

229 for magic in self.magic 

230 ) 

231 

232 

233ioformats: dict[str, IOFormat] = {} # These will be filled at run-time. 

234extension2format = {} 

235 

236 

237all_formats = ioformats # Aliased for compatibility only. Please do not use. 

238format2modulename = {} # Left for compatibility only. 

239 

240 

241def define_io_format(name, desc, code, *, module=None, ext=None, 

242 glob=None, magic=None, encoding=None, 

243 magic_regex=None, external=False): 

244 if module is None: 

245 module = name.replace('-', '_') 

246 format2modulename[name] = module 

247 

248 if not external: 

249 module = 'ase.io.' + module 

250 

251 def normalize_patterns(strings): 

252 if strings is None: 

253 strings = [] 

254 elif isinstance(strings, (str, bytes)): 

255 strings = [strings] 

256 else: 

257 strings = list(strings) 

258 return strings 

259 

260 fmt = IOFormat(name, desc, code, module_name=module, 

261 encoding=encoding) 

262 fmt.extensions = normalize_patterns(ext) 

263 fmt.globs = normalize_patterns(glob) 

264 fmt.magic = normalize_patterns(magic) 

265 

266 if magic_regex is not None: 

267 fmt.magic_regex = magic_regex 

268 

269 for ext in fmt.extensions: 

270 if ext in extension2format: 

271 raise ValueError(f'extension "{ext}" already registered') 

272 extension2format[ext] = fmt 

273 

274 ioformats[name] = fmt 

275 return fmt 

276 

277 

278def get_ioformat(name: str) -> IOFormat: 

279 """Return ioformat object or raise appropriate error.""" 

280 if name not in ioformats: 

281 raise UnknownFileTypeError(name) 

282 fmt = ioformats[name] 

283 # Make sure module is importable, since this could also raise an error. 

284 fmt.module 

285 return ioformats[name] 

286 

287 

288def register_external_io_formats(group): 

289 if hasattr(entry_points(), 'select'): 

290 fmt_entry_points = entry_points().select(group=group) 

291 else: 

292 fmt_entry_points = entry_points().get(group, ()) 

293 

294 for entry_point in fmt_entry_points: 

295 try: 

296 define_external_io_format(entry_point) 

297 except Exception as exc: 

298 warnings.warn( 

299 'Failed to register external ' 

300 f'IO format {entry_point.name}: {exc}' 

301 ) 

302 

303 

304def define_external_io_format(entry_point): 

305 

306 fmt = entry_point.load() 

307 if entry_point.name in ioformats: 

308 raise ValueError(f'Format {entry_point.name} already defined') 

309 if not isinstance(fmt, ExternalIOFormat): 

310 raise TypeError('Wrong type for registering external IO formats ' 

311 f'in format {entry_point.name}, expected ' 

312 'ExternalIOFormat') 

313 F(entry_point.name, **fmt._asdict(), external=True) 

314 

315 

316# We define all the IO formats below. Each IO format has a code, 

317# such as '1F', which defines some of the format's properties: 

318# 

319# 1=single atoms object 

320# +=multiple atoms objects 

321# F=accepts a file-descriptor 

322# S=needs a file-name str 

323# B=like F, but opens in binary mode 

324 

325F = define_io_format 

326F('abinit-gsr', 'ABINIT GSR file', '1S', 

327 module='abinit', glob='*o_GSR.nc') 

328F('abinit-in', 'ABINIT input file', '1F', 

329 module='abinit', magic=b'*znucl *') 

330F('abinit-out', 'ABINIT output file', '1F', 

331 module='abinit', magic=b'*.Version * of ABINIT') 

332F('aims', 'FHI-aims geometry file', '1S', ext='in') 

333F('aims-output', 'FHI-aims output', '+S', 

334 module='aims', magic=b'*Invoking FHI-aims ...') 

335F('bundletrajectory', 'ASE bundle trajectory', '+S') 

336# XXX: Define plugin in ase db backends package: 

337# F('aselmdb', 'ASE LMDB format', '+F') 

338F('castep-castep', 'CASTEP output file', '+F', 

339 module='castep', ext='castep') 

340F('castep-cell', 'CASTEP geom file', '1F', 

341 module='castep', ext='cell') 

342F('castep-geom', 'CASTEP trajectory file', '+F', 

343 module='castep', ext='geom') 

344F('castep-md', 'CASTEP molecular dynamics file', '+F', 

345 module='castep', ext='md') 

346F('castep-phonon', 'CASTEP phonon file', '1F', 

347 module='castep', ext='phonon') 

348F('cfg', 'AtomEye configuration', '1F') 

349F('cif', 'CIF-file', '+B', ext='cif') 

350F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

351F('cjson', 'Chemical json file', '1F', ext='cjson') 

352F('cp2k-dcd', 'CP2K DCD file', '+B', 

353 module='cp2k', ext='dcd') 

354F('cp2k-restart', 'CP2K restart file', '1F', 

355 module='cp2k', ext='restart') 

356F('crystal', 'Crystal fort.34 format', '1F', 

357 ext=['f34', '34'], glob=['f34', '34']) 

358F('cube', 'CUBE file', '1F', ext='cube') 

359F('dacapo-text', 'Dacapo text output', '1F', 

360 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

361F('db', 'ASE SQLite database file', '+S') 

362F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

363F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

364 module='dlp4', ext='config', glob=['*CONFIG*']) 

365F('dlp-history', 'DL_POLY HISTORY file', '+F', 

366 module='dlp4', glob='HISTORY') 

367F('dmol-arc', 'DMol3 arc file', '+S', 

368 module='dmol', ext='arc') 

369F('dmol-car', 'DMol3 structure file', '1S', 

370 module='dmol', ext='car') 

371F('dmol-incoor', 'DMol3 structure file', '1S', 

372 module='dmol') 

373F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

374 glob=['GEOMETRY.OUT']) 

375F('elk-in', 'ELK input file', '1F', module='elk') 

376F('eon', 'EON CON file', '+F', 

377 ext='con') 

378F('eps', 'Encapsulated Postscript', '1S') 

379F('espresso-in', 'Quantum espresso in file', '1F', 

380 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

381F('espresso-out', 'Quantum espresso out file', '+F', 

382 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

383F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

384F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

385F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

386F('findsym', 'FINDSYM-format', '+F') 

387F('gamess-us-out', 'GAMESS-US output file', '1F', 

388 module='gamess_us', magic=b'*GAMESS') 

389F('gamess-us-in', 'GAMESS-US input file', '1F', 

390 module='gamess_us') 

391F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

392 module='gamess_us', magic=b' $DATA', ext='dat') 

393F('gaussian-in', 'Gaussian com (input) file', '1F', 

394 module='gaussian', ext=['com', 'gjf']) 

395F('gaussian-out', 'Gaussian output file', '+F', 

396 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

397F('acemolecule-out', 'ACE output file', '1S', 

398 module='acemolecule') 

399F('acemolecule-input', 'ACE input file', '1S', 

400 module='acemolecule') 

401F('gen', 'DFTBPlus GEN format', '1F') 

402F('gif', 'Graphics interchange format', '+S', 

403 module='animation') 

404F('gpaw-out', 'GPAW text output', '+F', 

405 magic=b'* ___ ___ ___ _ _ _') 

406F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

407F('gpw', 'GPAW restart-file', '1S', 

408 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

409F('gromacs', 'Gromacs coordinates', '1F', 

410 ext='gro') 

411F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

412F('html', 'X3DOM HTML', '1F', module='x3d') 

413F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

414F('jsv', 'JSV file format', '1F') 

415F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

416 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

417F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

418 module='lammpsrun') 

419F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

420 encoding='ascii') 

421F('magres', 'MAGRES ab initio NMR data file', '1F') 

422F('mol', 'MDL Molfile', '1F') 

423F('mp4', 'MP4 animation', '+S', 

424 module='animation') 

425F('mustem', 'muSTEM xtl file', '1F', 

426 ext='xtl') 

427F('mysql', 'ASE MySQL database file', '+S', 

428 module='db') 

429F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

430 magic=b'CDF') 

431F('nomad-json', 'JSON from Nomad archive', '+F', 

432 ext='nomad-json') 

433F('nwchem-in', 'NWChem input file', '1F', 

434 module='nwchem', ext='nwi') 

435F('nwchem-out', 'NWChem output file', '+F', 

436 module='nwchem', ext='nwo', 

437 magic=b'*Northwest Computational Chemistry Package') 

438F('octopus-in', 'Octopus input file', '1F', 

439 module='octopus', glob='inp') 

440F('onetep-out', 'ONETEP output file', '+F', 

441 module='onetep', 

442 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

443F('onetep-in', 'ONETEP input file', '1F', 

444 module='onetep', 

445 magic=[b'*lock species ', 

446 b'*LOCK SPECIES ', 

447 b'*--- INPUT FILE ---*']) 

448F('orca-output', 'ORCA output', '+F', 

449 module='orca', magic=b'* O R C A *') 

450F('proteindatabank', 'Protein Data Bank', '+F', 

451 ext='pdb') 

452F('png', 'Portable Network Graphics', '1B') 

453F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

454F('pov', 'Persistance of Vision', '1S') 

455# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

456# extension 

457F('prismatic', 'prismatic and computem XYZ-file', '1F') 

458F('py', 'Python file', '+F') 

459F('sys', 'qball sys file', '1F') 

460F('qbox', 'QBOX output file', '+F', 

461 magic=b'*:simulation xmlns:') 

462F('res', 'SHELX format', '1S', ext='shelx') 

463F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

464F('sdf', 'SDF format', '1F') 

465F('siesta-xv', 'Siesta .XV file', '1F', 

466 glob='*.XV', module='siesta') 

467F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

468F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

469F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

470 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

471F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

472 magic=b'$coord') 

473F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

474 module='turbomole', glob='gradient', magic=b'$grad') 

475F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

476F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

477 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

478F('vasp-out', 'VASP OUTCAR file', '+F', 

479 module='vasp', glob='*OUTCAR*') 

480F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

481 module='vasp', glob='*XDATCAR*') 

482F('vasp-xml', 'VASP vasprun.xml file', '+F', 

483 module='vasp', glob='*vasp*.xml') 

484F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

485F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

486F('wout', 'Wannier90 output', '1F', module='wannier90') 

487F('x3d', 'X3D', '1S') 

488F('xsd', 'Materials Studio file', '1F') 

489F('xsf', 'XCrySDen Structure File', '+F', 

490 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

491 b'*\nMOLECULE', b'*\nATOMS']) 

492F('xtd', 'Materials Studio file', '+F') 

493# xyz: No `ext='xyz'` in the definition below. 

494# The .xyz files are handled by the extxyz module by default. 

495F('xyz', 'XYZ-file', '+F') 

496 

497# Register IO formats exposed through the ase.ioformats entry point 

498register_external_io_formats('ase.ioformats') 

499 

500 

501def get_compression(filename: str) -> tuple[str, str | None]: 

502 """ 

503 Parse any expected file compression from the extension of a filename. 

504 Return the filename without the extension, and the extension. Recognises 

505 ``.gz``, ``.bz2``, ``.xz``. 

506 

507 >>> get_compression('H2O.pdb.gz') 

508 ('H2O.pdb', 'gz') 

509 >>> get_compression('crystal.cif') 

510 ('crystal.cif', None) 

511 

512 Parameters 

513 ---------- 

514 filename: str 

515 Full filename including extension. 

516 

517 Returns 

518 ------- 

519 (root, extension): (str, str or None) 

520 Filename split into root without extension, and the extension 

521 indicating compression format. Will not split if compression 

522 is not recognised. 

523 """ 

524 # Update if anything is added 

525 valid_compression = ['gz', 'bz2', 'xz'] 

526 

527 # Use stdlib as it handles most edge cases 

528 root, compression = os.path.splitext(filename) 

529 

530 # extension keeps the '.' so remember to remove it 

531 if compression.strip('.') in valid_compression: 

532 return root, compression.strip('.') 

533 else: 

534 return filename, None 

535 

536 

537def open_with_compression(filename: str, mode: str = 'r') -> IO: 

538 """ 

539 Wrapper around builtin `open` that will guess compression of a file 

540 from the filename and open it for reading or writing as if it were 

541 a standard file. 

542 

543 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

544 

545 Supported modes are: 

546 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

547 * 'rb, 'wb' for binary read and write. 

548 

549 Parameters 

550 ---------- 

551 filename: str 

552 Path to the file to open, including any extensions that indicate 

553 the compression used. 

554 mode: str 

555 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

556 

557 Returns 

558 ------- 

559 fd: file 

560 File-like object open with the specified mode. 

561 """ 

562 

563 # Compressed formats sometimes default to binary, so force text mode. 

564 if mode == 'r': 

565 mode = 'rt' 

566 elif mode == 'w': 

567 mode = 'wt' 

568 elif mode == 'a': 

569 mode = 'at' 

570 

571 _root, compression = get_compression(filename) 

572 

573 if compression == 'gz': 

574 import gzip 

575 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

576 elif compression == 'bz2': 

577 import bz2 

578 return bz2.open(filename, mode=mode) 

579 elif compression == 'xz': 

580 import lzma 

581 return lzma.open(filename, mode) 

582 else: 

583 # Either None or unknown string 

584 return open(filename, mode) 

585 

586 

587def is_compressed(fd: io.BufferedIOBase) -> bool: 

588 """Check if the file object is in a compressed format.""" 

589 compressed = False 

590 

591 # We'd like to avoid triggering imports unless already imported. 

592 # Also, Python can be compiled without e.g. lzma so we need to 

593 # protect against that: 

594 if 'gzip' in sys.modules: 

595 import gzip 

596 compressed = compressed or isinstance(fd, gzip.GzipFile) 

597 if 'bz2' in sys.modules: 

598 import bz2 

599 compressed = compressed or isinstance(fd, bz2.BZ2File) 

600 if 'lzma' in sys.modules: 

601 import lzma 

602 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

603 return compressed 

604 

605 

606def wrap_read_function(read, filename, index=None, **kwargs): 

607 """Convert read-function to generator.""" 

608 if index is None: 

609 yield read(filename, **kwargs) 

610 else: 

611 yield from read(filename, index, **kwargs) 

612 

613 

614NameOrFile = str | PurePath | IO 

615 

616 

617def write( 

618 filename: NameOrFile, 

619 images: Atoms | Sequence[Atoms], 

620 format: str | None = None, 

621 parallel: bool = True, 

622 append: bool = False, 

623 **kwargs: Any 

624) -> None: 

625 """Write Atoms object(s) to file. 

626 

627 filename: str or file 

628 Name of the file to write to or a file descriptor. The name '-' 

629 means standard output. 

630 images: Atoms object or list of Atoms objects 

631 A single Atoms object or a list of Atoms objects. 

632 format: str 

633 Used to specify the file-format. If not given, the 

634 file-format will be taken from suffix of the filename. 

635 parallel: bool 

636 Default is to write on master only. Use parallel=False to write 

637 from all slaves. 

638 append: bool 

639 Default is to open files in 'w' or 'wb' mode, overwriting 

640 existing files. In some cases opening the file in 'a' or 'ab' 

641 mode (appending) is useful, 

642 e.g. writing trajectories or saving multiple Atoms objects in one file. 

643 WARNING: If the file format does not support multiple entries without 

644 additional keywords/headers, files created using 'append=True' 

645 might not be readable by any program! They will nevertheless be 

646 written without error message. 

647 

648 The use of additional keywords is format specific. write() may 

649 return an object after writing certain formats, but this behaviour 

650 may change in the future. 

651 

652 """ 

653 

654 if isinstance(filename, PurePath): 

655 filename = str(filename) 

656 

657 if isinstance(filename, str): 

658 fd = None 

659 if filename == '-': 

660 fd = sys.stdout 

661 filename = None # type: ignore[assignment] 

662 elif format is None: 

663 format = filetype(filename, read=False) 

664 assert isinstance(format, str) 

665 else: 

666 fd = filename # type: ignore[assignment] 

667 if format is None: 

668 try: 

669 format = filetype(filename, read=False) 

670 assert isinstance(format, str) 

671 except UnknownFileTypeError: 

672 format = None 

673 filename = None # type: ignore[assignment] 

674 

675 format = format or 'json' # default is json 

676 

677 io = get_ioformat(format) 

678 

679 return _write(filename, fd, format, io, images, 

680 parallel=parallel, append=append, **kwargs) 

681 

682 

683@parallel_function 

684def _write(filename, fd, format, io, images, parallel=None, append=False, 

685 **kwargs): 

686 if isinstance(images, Atoms): 

687 images = [images] 

688 

689 if io.single: 

690 if len(images) > 1: 

691 raise ValueError('{}-format can only store 1 Atoms object.' 

692 .format(format)) 

693 images = images[0] 

694 

695 if not io.can_write: 

696 raise ValueError(f"Can't write to {format}-format") 

697 

698 # Special case for json-format: 

699 if format == 'json' and (len(images) > 1 or append): 

700 if filename is not None: 

701 return io.write(filename, images, append=append, **kwargs) 

702 raise ValueError("Can't write more than one image to file-descriptor " 

703 'using json-format.') 

704 

705 if io.acceptsfd: 

706 open_new = (fd is None) 

707 try: 

708 if open_new: 

709 mode = 'wb' if io.isbinary else 'w' 

710 if append: 

711 mode = mode.replace('w', 'a') 

712 fd = open_with_compression(filename, mode) 

713 # XXX remember to re-enable compressed open 

714 # fd = io.open(filename, mode) 

715 return io.write(fd, images, **kwargs) 

716 finally: 

717 if open_new and fd is not None: 

718 fd.close() 

719 else: 

720 if fd is not None: 

721 raise ValueError("Can't write {}-format to file-descriptor" 

722 .format(format)) 

723 if io.can_append: 

724 return io.write(filename, images, append=append, **kwargs) 

725 elif append: 

726 raise ValueError("Cannot append to {}-format, write-function " 

727 "does not support the append keyword." 

728 .format(format)) 

729 else: 

730 return io.write(filename, images, **kwargs) 

731 

732 

733def read( 

734 filename: NameOrFile, 

735 index: Any | None = None, 

736 format: str | None = None, 

737 parallel: bool = True, 

738 do_not_split_by_at_sign: bool = False, 

739 **kwargs 

740) -> Atoms | list[Atoms]: 

741 """Read Atoms object(s) from file. 

742 

743 filename: str or file 

744 Name of the file to read from or a file descriptor. 

745 index: int, slice or str 

746 The last configuration will be returned by default. Examples: 

747 

748 * ``index=0``: first configuration 

749 * ``index=-2``: second to last 

750 * ``index=':'`` or ``index=slice(None)``: all 

751 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

752 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

753 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

754 format: str 

755 Used to specify the file-format. If not given, the 

756 file-format will be guessed by the *filetype* function. 

757 parallel: bool 

758 Default is to read on master and broadcast to slaves. Use 

759 parallel=False to read on all slaves. 

760 do_not_split_by_at_sign: bool 

761 If False (default) ``filename`` is splitted by at sign ``@`` 

762 

763 Many formats allow on open file-like object to be passed instead 

764 of ``filename``. In this case the format cannot be auto-detected, 

765 so the ``format`` argument should be explicitly given.""" 

766 

767 if isinstance(filename, PurePath): 

768 filename = str(filename) 

769 if filename == '-': 

770 filename = sys.stdin 

771 if isinstance(index, str): 

772 try: 

773 index = string2index(index) 

774 except ValueError: 

775 pass 

776 

777 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

778 if index is None: 

779 index = -1 

780 format = format or filetype(filename, read=isinstance(filename, str)) 

781 

782 io = get_ioformat(format) 

783 if isinstance(index, (slice, str)): 

784 return list(_iread(filename, index, format, io, parallel=parallel, 

785 **kwargs)) 

786 else: 

787 return next(_iread(filename, slice(index, None), format, io, 

788 parallel=parallel, **kwargs)) 

789 

790 

791def iread( 

792 filename: NameOrFile, 

793 index: Any | None = None, 

794 format: str | None = None, 

795 parallel: bool = True, 

796 do_not_split_by_at_sign: bool = False, 

797 **kwargs 

798) -> Iterator[Atoms]: 

799 """Iterator for reading Atoms objects from file. 

800 

801 Works as the `read` function, but yields one Atoms object at a time 

802 instead of all at once.""" 

803 

804 if isinstance(filename, PurePath): 

805 filename = str(filename) 

806 

807 if isinstance(index, str): 

808 index = string2index(index) 

809 

810 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

811 

812 if index is None or index == ':': 

813 index = slice(None, None, None) 

814 

815 if not isinstance(index, (slice, str)): 

816 index = slice(index, (index + 1) or None) 

817 

818 format = format or filetype(filename, read=isinstance(filename, str)) 

819 io = get_ioformat(format) 

820 

821 yield from _iread(filename, index, format, io, parallel=parallel, 

822 **kwargs) 

823 

824 

825@parallel_generator 

826def _iread(filename, index, format, io, parallel=None, full_output=False, 

827 **kwargs): 

828 

829 if not io.can_read: 

830 raise ValueError(f"Can't read from {format}-format") 

831 

832 if io.single: 

833 start = index.start 

834 assert start is None or start == 0 or start == -1 

835 args = () 

836 else: 

837 args = (index,) 

838 

839 must_close_fd = False 

840 if isinstance(filename, str): 

841 if io.acceptsfd: 

842 mode = 'rb' if io.isbinary else 'r' 

843 fd = open_with_compression(filename, mode) 

844 must_close_fd = True 

845 else: 

846 fd = filename 

847 else: 

848 assert io.acceptsfd 

849 fd = filename 

850 

851 # Make sure fd is closed in case loop doesn't finish: 

852 try: 

853 for dct in io.read(fd, *args, **kwargs): 

854 if not isinstance(dct, dict): 

855 dct = {'atoms': dct} 

856 if full_output: 

857 yield dct 

858 else: 

859 yield dct['atoms'] 

860 finally: 

861 if must_close_fd: 

862 fd.close() 

863 

864 

865def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

866 if not isinstance(filename, str): 

867 return filename, index 

868 

869 basename = os.path.basename(filename) 

870 if do_not_split_by_at_sign or '@' not in basename: 

871 return filename, index 

872 

873 newindex = None 

874 newfilename, newindex = filename.rsplit('@', 1) 

875 

876 if isinstance(index, slice): 

877 return newfilename, index 

878 try: 

879 newindex = string2index(newindex) 

880 except ValueError: 

881 warnings.warn('Can not parse index for path \n' 

882 ' "%s" \nConsider set ' 

883 'do_not_split_by_at_sign=True \nif ' 

884 'there is no index.' % filename) 

885 return newfilename, newindex 

886 

887 

888def match_magic(data: bytes) -> IOFormat: 

889 data = data[:PEEK_BYTES] 

890 for ioformat in ioformats.values(): 

891 if ioformat.match_magic(data): 

892 return ioformat 

893 raise UnknownFileTypeError('Cannot guess file type from contents') 

894 

895 

896def filetype( 

897 filename: NameOrFile, 

898 read: bool = True, 

899 guess: bool = True, 

900) -> str: 

901 """Try to guess the type of the file. 

902 

903 First, special signatures in the filename will be checked for. If that 

904 does not identify the file type, then the first 2000 bytes of the file 

905 will be read and analysed. Turn off this second part by using 

906 read=False. 

907 

908 Can be used from the command-line also:: 

909 

910 $ ase info filename ... 

911 """ 

912 

913 orig_filename = filename 

914 if hasattr(filename, 'name'): 

915 filename = filename.name 

916 

917 ext = None 

918 if isinstance(filename, str): 

919 if os.path.isdir(filename): 

920 if os.path.basename(os.path.normpath(filename)) == 'states': 

921 return 'eon' 

922 return 'bundletrajectory' 

923 

924 if filename.startswith('postgres'): 

925 return 'postgresql' 

926 

927 if filename.startswith('mysql') or filename.startswith('mariadb'): 

928 return 'mysql' 

929 

930 if filename.endswith('aselmdb'): 

931 return 'db' 

932 

933 # strip any compression extensions that can be read 

934 root, _compression = get_compression(filename) 

935 basename = os.path.basename(root) 

936 

937 if '.' in basename: 

938 ext = os.path.splitext(basename)[1].strip('.').lower() 

939 

940 for fmt in ioformats.values(): 

941 if fmt.match_name(basename): 

942 return fmt.name 

943 

944 if not read: 

945 if ext is None: 

946 raise UnknownFileTypeError('Could not guess file type') 

947 ioformat = extension2format.get(ext) 

948 if ioformat: 

949 return ioformat.name 

950 

951 # askhl: This is strange, we don't know if ext is a format: 

952 return ext 

953 

954 if orig_filename == filename: 

955 fd = open_with_compression(filename, 'rb') 

956 else: 

957 fd = orig_filename # type: ignore[assignment] 

958 else: 

959 fd = filename 

960 if fd is sys.stdin: 

961 return 'json' 

962 

963 data = fd.read(PEEK_BYTES) 

964 if fd is not filename: 

965 fd.close() 

966 else: 

967 fd.seek(0) 

968 

969 if len(data) == 0: 

970 raise UnknownFileTypeError('Empty file: ' + filename) 

971 

972 try: 

973 return match_magic(data).name 

974 except UnknownFileTypeError: 

975 pass 

976 

977 format = None 

978 if ext in extension2format: 

979 format = extension2format[ext].name 

980 

981 if format is None and guess: 

982 format = ext 

983 if format is None: 

984 # Do quick xyz check: 

985 lines = data.splitlines() 

986 if lines and lines[0].strip().isdigit(): 

987 return extension2format['xyz'].name 

988 

989 raise UnknownFileTypeError('Could not guess file type') 

990 assert isinstance(format, str) 

991 return format 

992 

993 

994def index2range(index: int | slice, length: int) -> range: 

995 """Convert slice or integer to range. 

996 

997 If index is an integer, range will contain only that integer.""" 

998 if isinstance(index, int): 

999 return range(index, index + 1) 

1000 return range(length)[index]