Coverage for /builds/ase/ase/ase/io/formats.py: 91.01%

534 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3"""File formats. 

4 

5This module implements the read(), iread() and write() functions in ase.io. 

6For each file format there is an IOFormat object. 

7 

8There is a dict, ioformats, which stores the objects. 

9 

10Example 

11======= 

12 

13The xyz format is implemented in the ase/io/xyz.py file which has a 

14read_xyz() generator and a write_xyz() function. This and other 

15information can be obtained from ioformats['xyz']. 

16""" 

17 

18import functools 

19import inspect 

20import io 

21import numbers 

22import os 

23import re 

24import sys 

25import warnings 

26from importlib import import_module 

27from importlib.metadata import entry_points 

28from pathlib import PurePath 

29from typing import ( 

30 IO, 

31 Any, 

32 Dict, 

33 Iterator, 

34 List, 

35 Optional, 

36 Sequence, 

37 Tuple, 

38 Union, 

39) 

40 

41from ase.atoms import Atoms 

42from ase.parallel import parallel_function, parallel_generator 

43from ase.utils import string2index 

44from ase.utils.plugins import ExternalIOFormat 

45 

46PEEK_BYTES = 50000 

47 

48 

49class UnknownFileTypeError(Exception): 

50 pass 

51 

52 

53class IOFormat: 

54 def __init__(self, name: str, desc: str, code: str, module_name: str, 

55 encoding: str = None) -> None: 

56 self.name = name 

57 self.description = desc 

58 assert len(code) == 2 

59 assert code[0] in list('+1') 

60 assert code[1] in list('BFS') 

61 self.code = code 

62 self.module_name = module_name 

63 self.encoding = encoding 

64 

65 # (To be set by define_io_format()) 

66 self.extensions: List[str] = [] 

67 self.globs: List[str] = [] 

68 self.magic: List[str] = [] 

69 self.magic_regex: Optional[bytes] = None 

70 

71 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

72 encoding = self.encoding 

73 if encoding is None: 

74 encoding = 'utf-8' # Best hacky guess. 

75 

76 if self.isbinary: 

77 if isinstance(data, str): 

78 data = data.encode(encoding) 

79 else: 

80 if isinstance(data, bytes): 

81 data = data.decode(encoding) 

82 

83 return self._ioclass(data) 

84 

85 @property 

86 def _ioclass(self): 

87 if self.isbinary: 

88 return io.BytesIO 

89 else: 

90 return io.StringIO 

91 

92 def parse_images(self, data: Union[str, bytes], 

93 **kwargs) -> Sequence[Atoms]: 

94 with self._buf_as_filelike(data) as fd: 

95 outputs = self.read(fd, **kwargs) 

96 if self.single: 

97 assert isinstance(outputs, Atoms) 

98 return [outputs] 

99 else: 

100 return list(self.read(fd, **kwargs)) 

101 

102 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

103 images = self.parse_images(data, **kwargs) 

104 return images[-1] 

105 

106 @property 

107 def can_read(self) -> bool: 

108 return self._readfunc() is not None 

109 

110 @property 

111 def can_write(self) -> bool: 

112 return self._writefunc() is not None 

113 

114 @property 

115 def can_append(self) -> bool: 

116 writefunc = self._writefunc() 

117 return self.can_write and 'append' in writefunc.__code__.co_varnames 

118 

119 def __repr__(self) -> str: 

120 tokens = [f'{name}={value!r}' 

121 for name, value in vars(self).items()] 

122 return 'IOFormat({})'.format(', '.join(tokens)) 

123 

124 def __getitem__(self, i): 

125 # For compatibility. 

126 # 

127 # Historically, the ioformats were listed as tuples 

128 # with (description, code). We look like such a tuple. 

129 return (self.description, self.code)[i] 

130 

131 @property 

132 def single(self) -> bool: 

133 """Whether this format is for a single Atoms object.""" 

134 return self.code[0] == '1' 

135 

136 @property 

137 def _formatname(self) -> str: 

138 return self.name.replace('-', '_') 

139 

140 def _readfunc(self): 

141 return getattr(self.module, 'read_' + self._formatname, None) 

142 

143 def _writefunc(self): 

144 return getattr(self.module, 'write_' + self._formatname, None) 

145 

146 @property 

147 def read(self): 

148 if not self.can_read: 

149 self._warn_none('read') 

150 return None 

151 

152 return self._read_wrapper 

153 

154 def _read_wrapper(self, *args, **kwargs): 

155 function = self._readfunc() 

156 if function is None: 

157 self._warn_none('read') 

158 return None 

159 if not inspect.isgeneratorfunction(function): 

160 function = functools.partial(wrap_read_function, function) 

161 return function(*args, **kwargs) 

162 

163 def _warn_none(self, action): 

164 msg = ('Accessing the IOFormat.{action} property on a format ' 

165 'without {action} support will change behaviour in the ' 

166 'future and return a callable instead of None. ' 

167 'Use IOFormat.can_{action} to check whether {action} ' 

168 'is supported.') 

169 warnings.warn(msg.format(action=action), FutureWarning) 

170 

171 @property 

172 def write(self): 

173 if not self.can_write: 

174 self._warn_none('write') 

175 return None 

176 

177 return self._write_wrapper 

178 

179 def _write_wrapper(self, *args, **kwargs): 

180 function = self._writefunc() 

181 if function is None: 

182 raise ValueError(f'Cannot write to {self.name}-format') 

183 return function(*args, **kwargs) 

184 

185 @property 

186 def modes(self) -> str: 

187 modes = '' 

188 if self.can_read: 

189 modes += 'r' 

190 if self.can_write: 

191 modes += 'w' 

192 return modes 

193 

194 def full_description(self) -> str: 

195 lines = [f'Name: {self.name}', 

196 f'Description: {self.description}', 

197 f'Modes: {self.modes}', 

198 f'Encoding: {self.encoding}', 

199 f'Module: {self.module_name}', 

200 f'Code: {self.code}', 

201 f'Extensions: {self.extensions}', 

202 f'Globs: {self.globs}', 

203 f'Magic: {self.magic}'] 

204 return '\n'.join(lines) 

205 

206 @property 

207 def acceptsfd(self) -> bool: 

208 return self.code[1] != 'S' 

209 

210 @property 

211 def isbinary(self) -> bool: 

212 return self.code[1] == 'B' 

213 

214 @property 

215 def module(self): 

216 try: 

217 return import_module(self.module_name) 

218 except ImportError as err: 

219 raise UnknownFileTypeError( 

220 f'File format not recognized: {self.name}. Error: {err}') 

221 

222 def match_name(self, basename: str) -> bool: 

223 from fnmatch import fnmatch 

224 return any(fnmatch(basename, pattern) 

225 for pattern in self.globs) 

226 

227 def match_magic(self, data: bytes) -> bool: 

228 if self.magic_regex: 

229 assert not self.magic, 'Define only one of magic and magic_regex' 

230 match = re.match(self.magic_regex, data, re.M | re.S) 

231 return match is not None 

232 

233 from fnmatch import fnmatchcase 

234 return any( 

235 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

236 for magic in self.magic 

237 ) 

238 

239 

240ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

241extension2format = {} 

242 

243 

244all_formats = ioformats # Aliased for compatibility only. Please do not use. 

245format2modulename = {} # Left for compatibility only. 

246 

247 

248def define_io_format(name, desc, code, *, module=None, ext=None, 

249 glob=None, magic=None, encoding=None, 

250 magic_regex=None, external=False): 

251 if module is None: 

252 module = name.replace('-', '_') 

253 format2modulename[name] = module 

254 

255 if not external: 

256 module = 'ase.io.' + module 

257 

258 def normalize_patterns(strings): 

259 if strings is None: 

260 strings = [] 

261 elif isinstance(strings, (str, bytes)): 

262 strings = [strings] 

263 else: 

264 strings = list(strings) 

265 return strings 

266 

267 fmt = IOFormat(name, desc, code, module_name=module, 

268 encoding=encoding) 

269 fmt.extensions = normalize_patterns(ext) 

270 fmt.globs = normalize_patterns(glob) 

271 fmt.magic = normalize_patterns(magic) 

272 

273 if magic_regex is not None: 

274 fmt.magic_regex = magic_regex 

275 

276 for ext in fmt.extensions: 

277 if ext in extension2format: 

278 raise ValueError(f'extension "{ext}" already registered') 

279 extension2format[ext] = fmt 

280 

281 ioformats[name] = fmt 

282 return fmt 

283 

284 

285def get_ioformat(name: str) -> IOFormat: 

286 """Return ioformat object or raise appropriate error.""" 

287 if name not in ioformats: 

288 raise UnknownFileTypeError(name) 

289 fmt = ioformats[name] 

290 # Make sure module is importable, since this could also raise an error. 

291 fmt.module 

292 return ioformats[name] 

293 

294 

295def register_external_io_formats(group): 

296 if hasattr(entry_points(), 'select'): 

297 fmt_entry_points = entry_points().select(group=group) 

298 else: 

299 fmt_entry_points = entry_points().get(group, ()) 

300 

301 for entry_point in fmt_entry_points: 

302 try: 

303 define_external_io_format(entry_point) 

304 except Exception as exc: 

305 warnings.warn( 

306 'Failed to register external ' 

307 f'IO format {entry_point.name}: {exc}' 

308 ) 

309 

310 

311def define_external_io_format(entry_point): 

312 

313 fmt = entry_point.load() 

314 if entry_point.name in ioformats: 

315 raise ValueError(f'Format {entry_point.name} already defined') 

316 if not isinstance(fmt, ExternalIOFormat): 

317 raise TypeError('Wrong type for registering external IO formats ' 

318 f'in format {entry_point.name}, expected ' 

319 'ExternalIOFormat') 

320 F(entry_point.name, **fmt._asdict(), external=True) 

321 

322 

323# We define all the IO formats below. Each IO format has a code, 

324# such as '1F', which defines some of the format's properties: 

325# 

326# 1=single atoms object 

327# +=multiple atoms objects 

328# F=accepts a file-descriptor 

329# S=needs a file-name str 

330# B=like F, but opens in binary mode 

331 

332F = define_io_format 

333F('abinit-gsr', 'ABINIT GSR file', '1S', 

334 module='abinit', glob='*o_GSR.nc') 

335F('abinit-in', 'ABINIT input file', '1F', 

336 module='abinit', magic=b'*znucl *') 

337F('abinit-out', 'ABINIT output file', '1F', 

338 module='abinit', magic=b'*.Version * of ABINIT') 

339F('aims', 'FHI-aims geometry file', '1S', ext='in') 

340F('aims-output', 'FHI-aims output', '+S', 

341 module='aims', magic=b'*Invoking FHI-aims ...') 

342F('bundletrajectory', 'ASE bundle trajectory', '+S') 

343# XXX: Define plugin in ase db backends package: 

344# F('aselmdb', 'ASE LMDB format', '+F') 

345F('castep-castep', 'CASTEP output file', '+F', 

346 module='castep', ext='castep') 

347F('castep-cell', 'CASTEP geom file', '1F', 

348 module='castep', ext='cell') 

349F('castep-geom', 'CASTEP trajectory file', '+F', 

350 module='castep', ext='geom') 

351F('castep-md', 'CASTEP molecular dynamics file', '+F', 

352 module='castep', ext='md') 

353F('castep-phonon', 'CASTEP phonon file', '1F', 

354 module='castep', ext='phonon') 

355F('cfg', 'AtomEye configuration', '1F') 

356F('cif', 'CIF-file', '+B', ext='cif') 

357F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

358F('cjson', 'Chemical json file', '1F', ext='cjson') 

359F('cp2k-dcd', 'CP2K DCD file', '+B', 

360 module='cp2k', ext='dcd') 

361F('cp2k-restart', 'CP2K restart file', '1F', 

362 module='cp2k', ext='restart') 

363F('crystal', 'Crystal fort.34 format', '1F', 

364 ext=['f34', '34'], glob=['f34', '34']) 

365F('cube', 'CUBE file', '1F', ext='cube') 

366F('dacapo-text', 'Dacapo text output', '1F', 

367 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

368F('db', 'ASE SQLite database file', '+S') 

369F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

370F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

371 module='dlp4', ext='config', glob=['*CONFIG*']) 

372F('dlp-history', 'DL_POLY HISTORY file', '+F', 

373 module='dlp4', glob='HISTORY') 

374F('dmol-arc', 'DMol3 arc file', '+S', 

375 module='dmol', ext='arc') 

376F('dmol-car', 'DMol3 structure file', '1S', 

377 module='dmol', ext='car') 

378F('dmol-incoor', 'DMol3 structure file', '1S', 

379 module='dmol') 

380F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

381 glob=['GEOMETRY.OUT']) 

382F('elk-in', 'ELK input file', '1F', module='elk') 

383F('eon', 'EON CON file', '+F', 

384 ext='con') 

385F('eps', 'Encapsulated Postscript', '1S') 

386F('espresso-in', 'Quantum espresso in file', '1F', 

387 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

388F('espresso-out', 'Quantum espresso out file', '+F', 

389 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

390F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

391F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

392F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

393F('findsym', 'FINDSYM-format', '+F') 

394F('gamess-us-out', 'GAMESS-US output file', '1F', 

395 module='gamess_us', magic=b'*GAMESS') 

396F('gamess-us-in', 'GAMESS-US input file', '1F', 

397 module='gamess_us') 

398F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

399 module='gamess_us', magic=b' $DATA', ext='dat') 

400F('gaussian-in', 'Gaussian com (input) file', '1F', 

401 module='gaussian', ext=['com', 'gjf']) 

402F('gaussian-out', 'Gaussian output file', '+F', 

403 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

404F('acemolecule-out', 'ACE output file', '1S', 

405 module='acemolecule') 

406F('acemolecule-input', 'ACE input file', '1S', 

407 module='acemolecule') 

408F('gen', 'DFTBPlus GEN format', '1F') 

409F('gif', 'Graphics interchange format', '+S', 

410 module='animation') 

411F('gpaw-out', 'GPAW text output', '+F', 

412 magic=b'* ___ ___ ___ _ _ _') 

413F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

414F('gpw', 'GPAW restart-file', '1S', 

415 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

416F('gromacs', 'Gromacs coordinates', '1F', 

417 ext='gro') 

418F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

419F('html', 'X3DOM HTML', '1F', module='x3d') 

420F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

421F('jsv', 'JSV file format', '1F') 

422F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

423 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

424F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

425 module='lammpsrun') 

426F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

427 encoding='ascii') 

428F('magres', 'MAGRES ab initio NMR data file', '1F') 

429F('mol', 'MDL Molfile', '1F') 

430F('mp4', 'MP4 animation', '+S', 

431 module='animation') 

432F('mustem', 'muSTEM xtl file', '1F', 

433 ext='xtl') 

434F('mysql', 'ASE MySQL database file', '+S', 

435 module='db') 

436F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

437 magic=b'CDF') 

438F('nomad-json', 'JSON from Nomad archive', '+F', 

439 ext='nomad-json') 

440F('nwchem-in', 'NWChem input file', '1F', 

441 module='nwchem', ext='nwi') 

442F('nwchem-out', 'NWChem output file', '+F', 

443 module='nwchem', ext='nwo', 

444 magic=b'*Northwest Computational Chemistry Package') 

445F('octopus-in', 'Octopus input file', '1F', 

446 module='octopus', glob='inp') 

447F('onetep-out', 'ONETEP output file', '+F', 

448 module='onetep', 

449 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

450F('onetep-in', 'ONETEP input file', '1F', 

451 module='onetep', 

452 magic=[b'*lock species ', 

453 b'*LOCK SPECIES ', 

454 b'*--- INPUT FILE ---*']) 

455F('orca-output', 'ORCA output', '+F', 

456 module='orca', magic=b'* O R C A *') 

457F('proteindatabank', 'Protein Data Bank', '+F', 

458 ext='pdb') 

459F('png', 'Portable Network Graphics', '1B') 

460F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

461F('pov', 'Persistance of Vision', '1S') 

462# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

463# extension 

464F('prismatic', 'prismatic and computem XYZ-file', '1F') 

465F('py', 'Python file', '+F') 

466F('sys', 'qball sys file', '1F') 

467F('qbox', 'QBOX output file', '+F', 

468 magic=b'*:simulation xmlns:') 

469F('res', 'SHELX format', '1S', ext='shelx') 

470F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

471F('sdf', 'SDF format', '1F') 

472F('siesta-xv', 'Siesta .XV file', '1F', 

473 glob='*.XV', module='siesta') 

474F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

475F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

476F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

477 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

478F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

479 magic=b'$coord') 

480F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

481 module='turbomole', glob='gradient', magic=b'$grad') 

482F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

483F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

484 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

485F('vasp-out', 'VASP OUTCAR file', '+F', 

486 module='vasp', glob='*OUTCAR*') 

487F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

488 module='vasp', glob='*XDATCAR*') 

489F('vasp-xml', 'VASP vasprun.xml file', '+F', 

490 module='vasp', glob='*vasp*.xml') 

491F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

492F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

493F('wout', 'Wannier90 output', '1F', module='wannier90') 

494F('x3d', 'X3D', '1S') 

495F('xsd', 'Materials Studio file', '1F') 

496F('xsf', 'XCrySDen Structure File', '+F', 

497 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

498 b'*\nMOLECULE', b'*\nATOMS']) 

499F('xtd', 'Materials Studio file', '+F') 

500# xyz: No `ext='xyz'` in the definition below. 

501# The .xyz files are handled by the extxyz module by default. 

502F('xyz', 'XYZ-file', '+F') 

503 

504# Register IO formats exposed through the ase.ioformats entry point 

505register_external_io_formats('ase.ioformats') 

506 

507 

508def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

509 """ 

510 Parse any expected file compression from the extension of a filename. 

511 Return the filename without the extension, and the extension. Recognises 

512 ``.gz``, ``.bz2``, ``.xz``. 

513 

514 >>> get_compression('H2O.pdb.gz') 

515 ('H2O.pdb', 'gz') 

516 >>> get_compression('crystal.cif') 

517 ('crystal.cif', None) 

518 

519 Parameters 

520 ========== 

521 filename: str 

522 Full filename including extension. 

523 

524 Returns 

525 ======= 

526 (root, extension): (str, str or None) 

527 Filename split into root without extension, and the extension 

528 indicating compression format. Will not split if compression 

529 is not recognised. 

530 """ 

531 # Update if anything is added 

532 valid_compression = ['gz', 'bz2', 'xz'] 

533 

534 # Use stdlib as it handles most edge cases 

535 root, compression = os.path.splitext(filename) 

536 

537 # extension keeps the '.' so remember to remove it 

538 if compression.strip('.') in valid_compression: 

539 return root, compression.strip('.') 

540 else: 

541 return filename, None 

542 

543 

544def open_with_compression(filename: str, mode: str = 'r') -> IO: 

545 """ 

546 Wrapper around builtin `open` that will guess compression of a file 

547 from the filename and open it for reading or writing as if it were 

548 a standard file. 

549 

550 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

551 

552 Supported modes are: 

553 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

554 * 'rb, 'wb' for binary read and write. 

555 

556 Parameters 

557 ========== 

558 filename: str 

559 Path to the file to open, including any extensions that indicate 

560 the compression used. 

561 mode: str 

562 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

563 

564 Returns 

565 ======= 

566 fd: file 

567 File-like object open with the specified mode. 

568 """ 

569 

570 # Compressed formats sometimes default to binary, so force text mode. 

571 if mode == 'r': 

572 mode = 'rt' 

573 elif mode == 'w': 

574 mode = 'wt' 

575 elif mode == 'a': 

576 mode = 'at' 

577 

578 _root, compression = get_compression(filename) 

579 

580 if compression == 'gz': 

581 import gzip 

582 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

583 elif compression == 'bz2': 

584 import bz2 

585 return bz2.open(filename, mode=mode) 

586 elif compression == 'xz': 

587 import lzma 

588 return lzma.open(filename, mode) 

589 else: 

590 # Either None or unknown string 

591 return open(filename, mode) 

592 

593 

594def is_compressed(fd: io.BufferedIOBase) -> bool: 

595 """Check if the file object is in a compressed format.""" 

596 compressed = False 

597 

598 # We'd like to avoid triggering imports unless already imported. 

599 # Also, Python can be compiled without e.g. lzma so we need to 

600 # protect against that: 

601 if 'gzip' in sys.modules: 

602 import gzip 

603 compressed = compressed or isinstance(fd, gzip.GzipFile) 

604 if 'bz2' in sys.modules: 

605 import bz2 

606 compressed = compressed or isinstance(fd, bz2.BZ2File) 

607 if 'lzma' in sys.modules: 

608 import lzma 

609 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

610 return compressed 

611 

612 

613def wrap_read_function(read, filename, index=None, **kwargs): 

614 """Convert read-function to generator.""" 

615 if index is None: 

616 yield read(filename, **kwargs) 

617 else: 

618 yield from read(filename, index, **kwargs) 

619 

620 

621NameOrFile = Union[str, PurePath, IO] 

622 

623 

624def write( 

625 filename: NameOrFile, 

626 images: Union[Atoms, Sequence[Atoms]], 

627 format: str = None, 

628 parallel: bool = True, 

629 append: bool = False, 

630 **kwargs: Any 

631) -> None: 

632 """Write Atoms object(s) to file. 

633 

634 filename: str or file 

635 Name of the file to write to or a file descriptor. The name '-' 

636 means standard output. 

637 images: Atoms object or list of Atoms objects 

638 A single Atoms object or a list of Atoms objects. 

639 format: str 

640 Used to specify the file-format. If not given, the 

641 file-format will be taken from suffix of the filename. 

642 parallel: bool 

643 Default is to write on master only. Use parallel=False to write 

644 from all slaves. 

645 append: bool 

646 Default is to open files in 'w' or 'wb' mode, overwriting 

647 existing files. In some cases opening the file in 'a' or 'ab' 

648 mode (appending) is useful, 

649 e.g. writing trajectories or saving multiple Atoms objects in one file. 

650 WARNING: If the file format does not support multiple entries without 

651 additional keywords/headers, files created using 'append=True' 

652 might not be readable by any program! They will nevertheless be 

653 written without error message. 

654 

655 The use of additional keywords is format specific. write() may 

656 return an object after writing certain formats, but this behaviour 

657 may change in the future. 

658 

659 """ 

660 

661 if isinstance(filename, PurePath): 

662 filename = str(filename) 

663 

664 if isinstance(filename, str): 

665 fd = None 

666 if filename == '-': 

667 fd = sys.stdout 

668 filename = None # type: ignore[assignment] 

669 elif format is None: 

670 format = filetype(filename, read=False) 

671 assert isinstance(format, str) 

672 else: 

673 fd = filename # type: ignore[assignment] 

674 if format is None: 

675 try: 

676 format = filetype(filename, read=False) 

677 assert isinstance(format, str) 

678 except UnknownFileTypeError: 

679 format = None 

680 filename = None # type: ignore[assignment] 

681 

682 format = format or 'json' # default is json 

683 

684 io = get_ioformat(format) 

685 

686 return _write(filename, fd, format, io, images, 

687 parallel=parallel, append=append, **kwargs) 

688 

689 

690@parallel_function 

691def _write(filename, fd, format, io, images, parallel=None, append=False, 

692 **kwargs): 

693 if isinstance(images, Atoms): 

694 images = [images] 

695 

696 if io.single: 

697 if len(images) > 1: 

698 raise ValueError('{}-format can only store 1 Atoms object.' 

699 .format(format)) 

700 images = images[0] 

701 

702 if not io.can_write: 

703 raise ValueError(f"Can't write to {format}-format") 

704 

705 # Special case for json-format: 

706 if format == 'json' and (len(images) > 1 or append): 

707 if filename is not None: 

708 return io.write(filename, images, append=append, **kwargs) 

709 raise ValueError("Can't write more than one image to file-descriptor " 

710 'using json-format.') 

711 

712 if io.acceptsfd: 

713 open_new = (fd is None) 

714 try: 

715 if open_new: 

716 mode = 'wb' if io.isbinary else 'w' 

717 if append: 

718 mode = mode.replace('w', 'a') 

719 fd = open_with_compression(filename, mode) 

720 # XXX remember to re-enable compressed open 

721 # fd = io.open(filename, mode) 

722 return io.write(fd, images, **kwargs) 

723 finally: 

724 if open_new and fd is not None: 

725 fd.close() 

726 else: 

727 if fd is not None: 

728 raise ValueError("Can't write {}-format to file-descriptor" 

729 .format(format)) 

730 if io.can_append: 

731 return io.write(filename, images, append=append, **kwargs) 

732 elif append: 

733 raise ValueError("Cannot append to {}-format, write-function " 

734 "does not support the append keyword." 

735 .format(format)) 

736 else: 

737 return io.write(filename, images, **kwargs) 

738 

739 

740def read( 

741 filename: NameOrFile, 

742 index: Any = None, 

743 format: Optional[str] = None, 

744 parallel: bool = True, 

745 do_not_split_by_at_sign: bool = False, 

746 **kwargs 

747) -> Union[Atoms, List[Atoms]]: 

748 """Read Atoms object(s) from file. 

749 

750 filename: str or file 

751 Name of the file to read from or a file descriptor. 

752 index: int, slice or str 

753 The last configuration will be returned by default. Examples: 

754 

755 * ``index=0``: first configuration 

756 * ``index=-2``: second to last 

757 * ``index=':'`` or ``index=slice(None)``: all 

758 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

759 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

760 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

761 format: str 

762 Used to specify the file-format. If not given, the 

763 file-format will be guessed by the *filetype* function. 

764 parallel: bool 

765 Default is to read on master and broadcast to slaves. Use 

766 parallel=False to read on all slaves. 

767 do_not_split_by_at_sign: bool 

768 If False (default) ``filename`` is splitted by at sign ``@`` 

769 

770 Many formats allow on open file-like object to be passed instead 

771 of ``filename``. In this case the format cannot be auto-detected, 

772 so the ``format`` argument should be explicitly given.""" 

773 

774 if isinstance(filename, PurePath): 

775 filename = str(filename) 

776 if filename == '-': 

777 filename = sys.stdin 

778 if isinstance(index, str): 

779 try: 

780 index = string2index(index) 

781 except ValueError: 

782 pass 

783 

784 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

785 if index is None: 

786 index = -1 

787 format = format or filetype(filename, read=isinstance(filename, str)) 

788 

789 io = get_ioformat(format) 

790 if isinstance(index, (slice, str)): 

791 return list(_iread(filename, index, format, io, parallel=parallel, 

792 **kwargs)) 

793 else: 

794 return next(_iread(filename, slice(index, None), format, io, 

795 parallel=parallel, **kwargs)) 

796 

797 

798def iread( 

799 filename: NameOrFile, 

800 index: Any = None, 

801 format: str = None, 

802 parallel: bool = True, 

803 do_not_split_by_at_sign: bool = False, 

804 **kwargs 

805) -> Iterator[Atoms]: 

806 """Iterator for reading Atoms objects from file. 

807 

808 Works as the `read` function, but yields one Atoms object at a time 

809 instead of all at once.""" 

810 

811 if isinstance(filename, PurePath): 

812 filename = str(filename) 

813 

814 if isinstance(index, str): 

815 index = string2index(index) 

816 

817 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

818 

819 if index is None or index == ':': 

820 index = slice(None, None, None) 

821 

822 if not isinstance(index, (slice, str)): 

823 index = slice(index, (index + 1) or None) 

824 

825 format = format or filetype(filename, read=isinstance(filename, str)) 

826 io = get_ioformat(format) 

827 

828 yield from _iread(filename, index, format, io, parallel=parallel, 

829 **kwargs) 

830 

831 

832@parallel_generator 

833def _iread(filename, index, format, io, parallel=None, full_output=False, 

834 **kwargs): 

835 

836 if not io.can_read: 

837 raise ValueError(f"Can't read from {format}-format") 

838 

839 if io.single: 

840 start = index.start 

841 assert start is None or start == 0 or start == -1 

842 args = () 

843 else: 

844 args = (index,) 

845 

846 must_close_fd = False 

847 if isinstance(filename, str): 

848 if io.acceptsfd: 

849 mode = 'rb' if io.isbinary else 'r' 

850 fd = open_with_compression(filename, mode) 

851 must_close_fd = True 

852 else: 

853 fd = filename 

854 else: 

855 assert io.acceptsfd 

856 fd = filename 

857 

858 # Make sure fd is closed in case loop doesn't finish: 

859 try: 

860 for dct in io.read(fd, *args, **kwargs): 

861 if not isinstance(dct, dict): 

862 dct = {'atoms': dct} 

863 if full_output: 

864 yield dct 

865 else: 

866 yield dct['atoms'] 

867 finally: 

868 if must_close_fd: 

869 fd.close() 

870 

871 

872def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

873 if not isinstance(filename, str): 

874 return filename, index 

875 

876 basename = os.path.basename(filename) 

877 if do_not_split_by_at_sign or '@' not in basename: 

878 return filename, index 

879 

880 newindex = None 

881 newfilename, newindex = filename.rsplit('@', 1) 

882 

883 if isinstance(index, slice): 

884 return newfilename, index 

885 try: 

886 newindex = string2index(newindex) 

887 except ValueError: 

888 warnings.warn('Can not parse index for path \n' 

889 ' "%s" \nConsider set ' 

890 'do_not_split_by_at_sign=True \nif ' 

891 'there is no index.' % filename) 

892 return newfilename, newindex 

893 

894 

895def match_magic(data: bytes) -> IOFormat: 

896 data = data[:PEEK_BYTES] 

897 for ioformat in ioformats.values(): 

898 if ioformat.match_magic(data): 

899 return ioformat 

900 raise UnknownFileTypeError('Cannot guess file type from contents') 

901 

902 

903def filetype( 

904 filename: NameOrFile, 

905 read: bool = True, 

906 guess: bool = True, 

907) -> str: 

908 """Try to guess the type of the file. 

909 

910 First, special signatures in the filename will be checked for. If that 

911 does not identify the file type, then the first 2000 bytes of the file 

912 will be read and analysed. Turn off this second part by using 

913 read=False. 

914 

915 Can be used from the command-line also:: 

916 

917 $ ase info filename ... 

918 """ 

919 

920 orig_filename = filename 

921 if hasattr(filename, 'name'): 

922 filename = filename.name 

923 

924 ext = None 

925 if isinstance(filename, str): 

926 if os.path.isdir(filename): 

927 if os.path.basename(os.path.normpath(filename)) == 'states': 

928 return 'eon' 

929 return 'bundletrajectory' 

930 

931 if filename.startswith('postgres'): 

932 return 'postgresql' 

933 

934 if filename.startswith('mysql') or filename.startswith('mariadb'): 

935 return 'mysql' 

936 

937 if filename.endswith('aselmdb'): 

938 return 'db' 

939 

940 # strip any compression extensions that can be read 

941 root, _compression = get_compression(filename) 

942 basename = os.path.basename(root) 

943 

944 if '.' in basename: 

945 ext = os.path.splitext(basename)[1].strip('.').lower() 

946 

947 for fmt in ioformats.values(): 

948 if fmt.match_name(basename): 

949 return fmt.name 

950 

951 if not read: 

952 if ext is None: 

953 raise UnknownFileTypeError('Could not guess file type') 

954 ioformat = extension2format.get(ext) 

955 if ioformat: 

956 return ioformat.name 

957 

958 # askhl: This is strange, we don't know if ext is a format: 

959 return ext 

960 

961 if orig_filename == filename: 

962 fd = open_with_compression(filename, 'rb') 

963 else: 

964 fd = orig_filename # type: ignore[assignment] 

965 else: 

966 fd = filename 

967 if fd is sys.stdin: 

968 return 'json' 

969 

970 data = fd.read(PEEK_BYTES) 

971 if fd is not filename: 

972 fd.close() 

973 else: 

974 fd.seek(0) 

975 

976 if len(data) == 0: 

977 raise UnknownFileTypeError('Empty file: ' + filename) 

978 

979 try: 

980 return match_magic(data).name 

981 except UnknownFileTypeError: 

982 pass 

983 

984 format = None 

985 if ext in extension2format: 

986 format = extension2format[ext].name 

987 

988 if format is None and guess: 

989 format = ext 

990 if format is None: 

991 # Do quick xyz check: 

992 lines = data.splitlines() 

993 if lines and lines[0].strip().isdigit(): 

994 return extension2format['xyz'].name 

995 

996 raise UnknownFileTypeError('Could not guess file type') 

997 assert isinstance(format, str) 

998 return format 

999 

1000 

1001def index2range(index, length): 

1002 """Convert slice or integer to range. 

1003 

1004 If index is an integer, range will contain only that integer.""" 

1005 obj = range(length)[index] 

1006 if isinstance(obj, numbers.Integral): 

1007 obj = range(obj, obj + 1) 

1008 return obj