Coverage for ase / io / formats.py: 90.98%

532 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-04 10:20 +0000

1# fmt: off 

2 

3"""File formats. 

4 

5This module implements the read(), iread() and write() functions in ase.io. 

6For each file format there is an IOFormat object. 

7 

8There is a dict, ioformats, which stores the objects. 

9 

10Example 

11======= 

12 

13The xyz format is implemented in the ase/io/xyz.py file which has a 

14read_xyz() generator and a write_xyz() function. This and other 

15information can be obtained from ioformats['xyz']. 

16""" 

17 

18import functools 

19import inspect 

20import io 

21import os 

22import re 

23import sys 

24import warnings 

25from importlib import import_module 

26from importlib.metadata import entry_points 

27from pathlib import PurePath 

28from typing import ( 

29 IO, 

30 Any, 

31 Dict, 

32 Iterator, 

33 List, 

34 Optional, 

35 Sequence, 

36 Tuple, 

37 Union, 

38) 

39 

40from ase.atoms import Atoms 

41from ase.parallel import parallel_function, parallel_generator 

42from ase.utils import string2index 

43from ase.utils.plugins import ExternalIOFormat 

44 

45PEEK_BYTES = 50000 

46 

47 

48class UnknownFileTypeError(Exception): 

49 pass 

50 

51 

52class IOFormat: 

53 def __init__(self, name: str, desc: str, code: str, module_name: str, 

54 encoding: str = None) -> None: 

55 self.name = name 

56 self.description = desc 

57 assert len(code) == 2 

58 assert code[0] in list('+1') 

59 assert code[1] in list('BFS') 

60 self.code = code 

61 self.module_name = module_name 

62 self.encoding = encoding 

63 

64 # (To be set by define_io_format()) 

65 self.extensions: List[str] = [] 

66 self.globs: List[str] = [] 

67 self.magic: List[str] = [] 

68 self.magic_regex: Optional[bytes] = None 

69 

70 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

71 encoding = self.encoding 

72 if encoding is None: 

73 encoding = 'utf-8' # Best hacky guess. 

74 

75 if self.isbinary: 

76 if isinstance(data, str): 

77 data = data.encode(encoding) 

78 else: 

79 if isinstance(data, bytes): 

80 data = data.decode(encoding) 

81 

82 return self._ioclass(data) 

83 

84 @property 

85 def _ioclass(self): 

86 if self.isbinary: 

87 return io.BytesIO 

88 else: 

89 return io.StringIO 

90 

91 def parse_images(self, data: Union[str, bytes], 

92 **kwargs) -> Sequence[Atoms]: 

93 with self._buf_as_filelike(data) as fd: 

94 outputs = self.read(fd, **kwargs) 

95 if self.single: 

96 assert isinstance(outputs, Atoms) 

97 return [outputs] 

98 else: 

99 return list(self.read(fd, **kwargs)) 

100 

101 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

102 images = self.parse_images(data, **kwargs) 

103 return images[-1] 

104 

105 @property 

106 def can_read(self) -> bool: 

107 return self._readfunc() is not None 

108 

109 @property 

110 def can_write(self) -> bool: 

111 return self._writefunc() is not None 

112 

113 @property 

114 def can_append(self) -> bool: 

115 writefunc = self._writefunc() 

116 return self.can_write and 'append' in writefunc.__code__.co_varnames 

117 

118 def __repr__(self) -> str: 

119 tokens = [f'{name}={value!r}' 

120 for name, value in vars(self).items()] 

121 return 'IOFormat({})'.format(', '.join(tokens)) 

122 

123 def __getitem__(self, i): 

124 # For compatibility. 

125 # 

126 # Historically, the ioformats were listed as tuples 

127 # with (description, code). We look like such a tuple. 

128 return (self.description, self.code)[i] 

129 

130 @property 

131 def single(self) -> bool: 

132 """Whether this format is for a single Atoms object.""" 

133 return self.code[0] == '1' 

134 

135 @property 

136 def _formatname(self) -> str: 

137 return self.name.replace('-', '_') 

138 

139 def _readfunc(self): 

140 return getattr(self.module, 'read_' + self._formatname, None) 

141 

142 def _writefunc(self): 

143 return getattr(self.module, 'write_' + self._formatname, None) 

144 

145 @property 

146 def read(self): 

147 if not self.can_read: 

148 self._warn_none('read') 

149 return None 

150 

151 return self._read_wrapper 

152 

153 def _read_wrapper(self, *args, **kwargs): 

154 function = self._readfunc() 

155 if function is None: 

156 self._warn_none('read') 

157 return None 

158 if not inspect.isgeneratorfunction(function): 

159 function = functools.partial(wrap_read_function, function) 

160 return function(*args, **kwargs) 

161 

162 def _warn_none(self, action): 

163 msg = ('Accessing the IOFormat.{action} property on a format ' 

164 'without {action} support will change behaviour in the ' 

165 'future and return a callable instead of None. ' 

166 'Use IOFormat.can_{action} to check whether {action} ' 

167 'is supported.') 

168 warnings.warn(msg.format(action=action), FutureWarning) 

169 

170 @property 

171 def write(self): 

172 if not self.can_write: 

173 self._warn_none('write') 

174 return None 

175 

176 return self._write_wrapper 

177 

178 def _write_wrapper(self, *args, **kwargs): 

179 function = self._writefunc() 

180 if function is None: 

181 raise ValueError(f'Cannot write to {self.name}-format') 

182 return function(*args, **kwargs) 

183 

184 @property 

185 def modes(self) -> str: 

186 modes = '' 

187 if self.can_read: 

188 modes += 'r' 

189 if self.can_write: 

190 modes += 'w' 

191 return modes 

192 

193 def full_description(self) -> str: 

194 lines = [f'Name: {self.name}', 

195 f'Description: {self.description}', 

196 f'Modes: {self.modes}', 

197 f'Encoding: {self.encoding}', 

198 f'Module: {self.module_name}', 

199 f'Code: {self.code}', 

200 f'Extensions: {self.extensions}', 

201 f'Globs: {self.globs}', 

202 f'Magic: {self.magic}'] 

203 return '\n'.join(lines) 

204 

205 @property 

206 def acceptsfd(self) -> bool: 

207 return self.code[1] != 'S' 

208 

209 @property 

210 def isbinary(self) -> bool: 

211 return self.code[1] == 'B' 

212 

213 @property 

214 def module(self): 

215 try: 

216 return import_module(self.module_name) 

217 except ImportError as err: 

218 raise UnknownFileTypeError( 

219 f'File format not recognized: {self.name}. Error: {err}') 

220 

221 def match_name(self, basename: str) -> bool: 

222 from fnmatch import fnmatch 

223 return any(fnmatch(basename, pattern) 

224 for pattern in self.globs) 

225 

226 def match_magic(self, data: bytes) -> bool: 

227 if self.magic_regex: 

228 assert not self.magic, 'Define only one of magic and magic_regex' 

229 match = re.match(self.magic_regex, data, re.M | re.S) 

230 return match is not None 

231 

232 from fnmatch import fnmatchcase 

233 return any( 

234 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

235 for magic in self.magic 

236 ) 

237 

238 

239ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

240extension2format = {} 

241 

242 

243all_formats = ioformats # Aliased for compatibility only. Please do not use. 

244format2modulename = {} # Left for compatibility only. 

245 

246 

247def define_io_format(name, desc, code, *, module=None, ext=None, 

248 glob=None, magic=None, encoding=None, 

249 magic_regex=None, external=False): 

250 if module is None: 

251 module = name.replace('-', '_') 

252 format2modulename[name] = module 

253 

254 if not external: 

255 module = 'ase.io.' + module 

256 

257 def normalize_patterns(strings): 

258 if strings is None: 

259 strings = [] 

260 elif isinstance(strings, (str, bytes)): 

261 strings = [strings] 

262 else: 

263 strings = list(strings) 

264 return strings 

265 

266 fmt = IOFormat(name, desc, code, module_name=module, 

267 encoding=encoding) 

268 fmt.extensions = normalize_patterns(ext) 

269 fmt.globs = normalize_patterns(glob) 

270 fmt.magic = normalize_patterns(magic) 

271 

272 if magic_regex is not None: 

273 fmt.magic_regex = magic_regex 

274 

275 for ext in fmt.extensions: 

276 if ext in extension2format: 

277 raise ValueError(f'extension "{ext}" already registered') 

278 extension2format[ext] = fmt 

279 

280 ioformats[name] = fmt 

281 return fmt 

282 

283 

284def get_ioformat(name: str) -> IOFormat: 

285 """Return ioformat object or raise appropriate error.""" 

286 if name not in ioformats: 

287 raise UnknownFileTypeError(name) 

288 fmt = ioformats[name] 

289 # Make sure module is importable, since this could also raise an error. 

290 fmt.module 

291 return ioformats[name] 

292 

293 

294def register_external_io_formats(group): 

295 if hasattr(entry_points(), 'select'): 

296 fmt_entry_points = entry_points().select(group=group) 

297 else: 

298 fmt_entry_points = entry_points().get(group, ()) 

299 

300 for entry_point in fmt_entry_points: 

301 try: 

302 define_external_io_format(entry_point) 

303 except Exception as exc: 

304 warnings.warn( 

305 'Failed to register external ' 

306 f'IO format {entry_point.name}: {exc}' 

307 ) 

308 

309 

310def define_external_io_format(entry_point): 

311 

312 fmt = entry_point.load() 

313 if entry_point.name in ioformats: 

314 raise ValueError(f'Format {entry_point.name} already defined') 

315 if not isinstance(fmt, ExternalIOFormat): 

316 raise TypeError('Wrong type for registering external IO formats ' 

317 f'in format {entry_point.name}, expected ' 

318 'ExternalIOFormat') 

319 F(entry_point.name, **fmt._asdict(), external=True) 

320 

321 

322# We define all the IO formats below. Each IO format has a code, 

323# such as '1F', which defines some of the format's properties: 

324# 

325# 1=single atoms object 

326# +=multiple atoms objects 

327# F=accepts a file-descriptor 

328# S=needs a file-name str 

329# B=like F, but opens in binary mode 

330 

331F = define_io_format 

332F('abinit-gsr', 'ABINIT GSR file', '1S', 

333 module='abinit', glob='*o_GSR.nc') 

334F('abinit-in', 'ABINIT input file', '1F', 

335 module='abinit', magic=b'*znucl *') 

336F('abinit-out', 'ABINIT output file', '1F', 

337 module='abinit', magic=b'*.Version * of ABINIT') 

338F('aims', 'FHI-aims geometry file', '1S', ext='in') 

339F('aims-output', 'FHI-aims output', '+S', 

340 module='aims', magic=b'*Invoking FHI-aims ...') 

341F('bundletrajectory', 'ASE bundle trajectory', '+S') 

342# XXX: Define plugin in ase db backends package: 

343# F('aselmdb', 'ASE LMDB format', '+F') 

344F('castep-castep', 'CASTEP output file', '+F', 

345 module='castep', ext='castep') 

346F('castep-cell', 'CASTEP geom file', '1F', 

347 module='castep', ext='cell') 

348F('castep-geom', 'CASTEP trajectory file', '+F', 

349 module='castep', ext='geom') 

350F('castep-md', 'CASTEP molecular dynamics file', '+F', 

351 module='castep', ext='md') 

352F('castep-phonon', 'CASTEP phonon file', '1F', 

353 module='castep', ext='phonon') 

354F('cfg', 'AtomEye configuration', '1F') 

355F('cif', 'CIF-file', '+B', ext='cif') 

356F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

357F('cjson', 'Chemical json file', '1F', ext='cjson') 

358F('cp2k-dcd', 'CP2K DCD file', '+B', 

359 module='cp2k', ext='dcd') 

360F('cp2k-restart', 'CP2K restart file', '1F', 

361 module='cp2k', ext='restart') 

362F('crystal', 'Crystal fort.34 format', '1F', 

363 ext=['f34', '34'], glob=['f34', '34']) 

364F('cube', 'CUBE file', '1F', ext='cube') 

365F('dacapo-text', 'Dacapo text output', '1F', 

366 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

367F('db', 'ASE SQLite database file', '+S') 

368F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

369F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

370 module='dlp4', ext='config', glob=['*CONFIG*']) 

371F('dlp-history', 'DL_POLY HISTORY file', '+F', 

372 module='dlp4', glob='HISTORY') 

373F('dmol-arc', 'DMol3 arc file', '+S', 

374 module='dmol', ext='arc') 

375F('dmol-car', 'DMol3 structure file', '1S', 

376 module='dmol', ext='car') 

377F('dmol-incoor', 'DMol3 structure file', '1S', 

378 module='dmol') 

379F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

380 glob=['GEOMETRY.OUT']) 

381F('elk-in', 'ELK input file', '1F', module='elk') 

382F('eon', 'EON CON file', '+F', 

383 ext='con') 

384F('eps', 'Encapsulated Postscript', '1S') 

385F('espresso-in', 'Quantum espresso in file', '1F', 

386 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

387F('espresso-out', 'Quantum espresso out file', '+F', 

388 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

389F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

390F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

391F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

392F('findsym', 'FINDSYM-format', '+F') 

393F('gamess-us-out', 'GAMESS-US output file', '1F', 

394 module='gamess_us', magic=b'*GAMESS') 

395F('gamess-us-in', 'GAMESS-US input file', '1F', 

396 module='gamess_us') 

397F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

398 module='gamess_us', magic=b' $DATA', ext='dat') 

399F('gaussian-in', 'Gaussian com (input) file', '1F', 

400 module='gaussian', ext=['com', 'gjf']) 

401F('gaussian-out', 'Gaussian output file', '+F', 

402 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

403F('acemolecule-out', 'ACE output file', '1S', 

404 module='acemolecule') 

405F('acemolecule-input', 'ACE input file', '1S', 

406 module='acemolecule') 

407F('gen', 'DFTBPlus GEN format', '1F') 

408F('gif', 'Graphics interchange format', '+S', 

409 module='animation') 

410F('gpaw-out', 'GPAW text output', '+F', 

411 magic=b'* ___ ___ ___ _ _ _') 

412F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

413F('gpw', 'GPAW restart-file', '1S', 

414 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

415F('gromacs', 'Gromacs coordinates', '1F', 

416 ext='gro') 

417F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

418F('html', 'X3DOM HTML', '1F', module='x3d') 

419F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

420F('jsv', 'JSV file format', '1F') 

421F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

422 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

423F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

424 module='lammpsrun') 

425F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

426 encoding='ascii') 

427F('magres', 'MAGRES ab initio NMR data file', '1F') 

428F('mol', 'MDL Molfile', '1F') 

429F('mp4', 'MP4 animation', '+S', 

430 module='animation') 

431F('mustem', 'muSTEM xtl file', '1F', 

432 ext='xtl') 

433F('mysql', 'ASE MySQL database file', '+S', 

434 module='db') 

435F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

436 magic=b'CDF') 

437F('nomad-json', 'JSON from Nomad archive', '+F', 

438 ext='nomad-json') 

439F('nwchem-in', 'NWChem input file', '1F', 

440 module='nwchem', ext='nwi') 

441F('nwchem-out', 'NWChem output file', '+F', 

442 module='nwchem', ext='nwo', 

443 magic=b'*Northwest Computational Chemistry Package') 

444F('octopus-in', 'Octopus input file', '1F', 

445 module='octopus', glob='inp') 

446F('onetep-out', 'ONETEP output file', '+F', 

447 module='onetep', 

448 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

449F('onetep-in', 'ONETEP input file', '1F', 

450 module='onetep', 

451 magic=[b'*lock species ', 

452 b'*LOCK SPECIES ', 

453 b'*--- INPUT FILE ---*']) 

454F('orca-output', 'ORCA output', '+F', 

455 module='orca', magic=b'* O R C A *') 

456F('proteindatabank', 'Protein Data Bank', '+F', 

457 ext='pdb') 

458F('png', 'Portable Network Graphics', '1B') 

459F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

460F('pov', 'Persistance of Vision', '1S') 

461# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

462# extension 

463F('prismatic', 'prismatic and computem XYZ-file', '1F') 

464F('py', 'Python file', '+F') 

465F('sys', 'qball sys file', '1F') 

466F('qbox', 'QBOX output file', '+F', 

467 magic=b'*:simulation xmlns:') 

468F('res', 'SHELX format', '1S', ext='shelx') 

469F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

470F('sdf', 'SDF format', '1F') 

471F('siesta-xv', 'Siesta .XV file', '1F', 

472 glob='*.XV', module='siesta') 

473F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

474F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

475F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

476 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

477F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

478 magic=b'$coord') 

479F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

480 module='turbomole', glob='gradient', magic=b'$grad') 

481F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

482F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

483 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

484F('vasp-out', 'VASP OUTCAR file', '+F', 

485 module='vasp', glob='*OUTCAR*') 

486F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

487 module='vasp', glob='*XDATCAR*') 

488F('vasp-xml', 'VASP vasprun.xml file', '+F', 

489 module='vasp', glob='*vasp*.xml') 

490F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

491F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

492F('wout', 'Wannier90 output', '1F', module='wannier90') 

493F('x3d', 'X3D', '1S') 

494F('xsd', 'Materials Studio file', '1F') 

495F('xsf', 'XCrySDen Structure File', '+F', 

496 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

497 b'*\nMOLECULE', b'*\nATOMS']) 

498F('xtd', 'Materials Studio file', '+F') 

499# xyz: No `ext='xyz'` in the definition below. 

500# The .xyz files are handled by the extxyz module by default. 

501F('xyz', 'XYZ-file', '+F') 

502 

503# Register IO formats exposed through the ase.ioformats entry point 

504register_external_io_formats('ase.ioformats') 

505 

506 

507def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

508 """ 

509 Parse any expected file compression from the extension of a filename. 

510 Return the filename without the extension, and the extension. Recognises 

511 ``.gz``, ``.bz2``, ``.xz``. 

512 

513 >>> get_compression('H2O.pdb.gz') 

514 ('H2O.pdb', 'gz') 

515 >>> get_compression('crystal.cif') 

516 ('crystal.cif', None) 

517 

518 Parameters 

519 ========== 

520 filename: str 

521 Full filename including extension. 

522 

523 Returns 

524 ======= 

525 (root, extension): (str, str or None) 

526 Filename split into root without extension, and the extension 

527 indicating compression format. Will not split if compression 

528 is not recognised. 

529 """ 

530 # Update if anything is added 

531 valid_compression = ['gz', 'bz2', 'xz'] 

532 

533 # Use stdlib as it handles most edge cases 

534 root, compression = os.path.splitext(filename) 

535 

536 # extension keeps the '.' so remember to remove it 

537 if compression.strip('.') in valid_compression: 

538 return root, compression.strip('.') 

539 else: 

540 return filename, None 

541 

542 

543def open_with_compression(filename: str, mode: str = 'r') -> IO: 

544 """ 

545 Wrapper around builtin `open` that will guess compression of a file 

546 from the filename and open it for reading or writing as if it were 

547 a standard file. 

548 

549 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

550 

551 Supported modes are: 

552 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

553 * 'rb, 'wb' for binary read and write. 

554 

555 Parameters 

556 ========== 

557 filename: str 

558 Path to the file to open, including any extensions that indicate 

559 the compression used. 

560 mode: str 

561 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

562 

563 Returns 

564 ======= 

565 fd: file 

566 File-like object open with the specified mode. 

567 """ 

568 

569 # Compressed formats sometimes default to binary, so force text mode. 

570 if mode == 'r': 

571 mode = 'rt' 

572 elif mode == 'w': 

573 mode = 'wt' 

574 elif mode == 'a': 

575 mode = 'at' 

576 

577 _root, compression = get_compression(filename) 

578 

579 if compression == 'gz': 

580 import gzip 

581 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

582 elif compression == 'bz2': 

583 import bz2 

584 return bz2.open(filename, mode=mode) 

585 elif compression == 'xz': 

586 import lzma 

587 return lzma.open(filename, mode) 

588 else: 

589 # Either None or unknown string 

590 return open(filename, mode) 

591 

592 

593def is_compressed(fd: io.BufferedIOBase) -> bool: 

594 """Check if the file object is in a compressed format.""" 

595 compressed = False 

596 

597 # We'd like to avoid triggering imports unless already imported. 

598 # Also, Python can be compiled without e.g. lzma so we need to 

599 # protect against that: 

600 if 'gzip' in sys.modules: 

601 import gzip 

602 compressed = compressed or isinstance(fd, gzip.GzipFile) 

603 if 'bz2' in sys.modules: 

604 import bz2 

605 compressed = compressed or isinstance(fd, bz2.BZ2File) 

606 if 'lzma' in sys.modules: 

607 import lzma 

608 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

609 return compressed 

610 

611 

612def wrap_read_function(read, filename, index=None, **kwargs): 

613 """Convert read-function to generator.""" 

614 if index is None: 

615 yield read(filename, **kwargs) 

616 else: 

617 yield from read(filename, index, **kwargs) 

618 

619 

620NameOrFile = Union[str, PurePath, IO] 

621 

622 

623def write( 

624 filename: NameOrFile, 

625 images: Union[Atoms, Sequence[Atoms]], 

626 format: str = None, 

627 parallel: bool = True, 

628 append: bool = False, 

629 **kwargs: Any 

630) -> None: 

631 """Write Atoms object(s) to file. 

632 

633 filename: str or file 

634 Name of the file to write to or a file descriptor. The name '-' 

635 means standard output. 

636 images: Atoms object or list of Atoms objects 

637 A single Atoms object or a list of Atoms objects. 

638 format: str 

639 Used to specify the file-format. If not given, the 

640 file-format will be taken from suffix of the filename. 

641 parallel: bool 

642 Default is to write on master only. Use parallel=False to write 

643 from all slaves. 

644 append: bool 

645 Default is to open files in 'w' or 'wb' mode, overwriting 

646 existing files. In some cases opening the file in 'a' or 'ab' 

647 mode (appending) is useful, 

648 e.g. writing trajectories or saving multiple Atoms objects in one file. 

649 WARNING: If the file format does not support multiple entries without 

650 additional keywords/headers, files created using 'append=True' 

651 might not be readable by any program! They will nevertheless be 

652 written without error message. 

653 

654 The use of additional keywords is format specific. write() may 

655 return an object after writing certain formats, but this behaviour 

656 may change in the future. 

657 

658 """ 

659 

660 if isinstance(filename, PurePath): 

661 filename = str(filename) 

662 

663 if isinstance(filename, str): 

664 fd = None 

665 if filename == '-': 

666 fd = sys.stdout 

667 filename = None # type: ignore[assignment] 

668 elif format is None: 

669 format = filetype(filename, read=False) 

670 assert isinstance(format, str) 

671 else: 

672 fd = filename # type: ignore[assignment] 

673 if format is None: 

674 try: 

675 format = filetype(filename, read=False) 

676 assert isinstance(format, str) 

677 except UnknownFileTypeError: 

678 format = None 

679 filename = None # type: ignore[assignment] 

680 

681 format = format or 'json' # default is json 

682 

683 io = get_ioformat(format) 

684 

685 return _write(filename, fd, format, io, images, 

686 parallel=parallel, append=append, **kwargs) 

687 

688 

689@parallel_function 

690def _write(filename, fd, format, io, images, parallel=None, append=False, 

691 **kwargs): 

692 if isinstance(images, Atoms): 

693 images = [images] 

694 

695 if io.single: 

696 if len(images) > 1: 

697 raise ValueError('{}-format can only store 1 Atoms object.' 

698 .format(format)) 

699 images = images[0] 

700 

701 if not io.can_write: 

702 raise ValueError(f"Can't write to {format}-format") 

703 

704 # Special case for json-format: 

705 if format == 'json' and (len(images) > 1 or append): 

706 if filename is not None: 

707 return io.write(filename, images, append=append, **kwargs) 

708 raise ValueError("Can't write more than one image to file-descriptor " 

709 'using json-format.') 

710 

711 if io.acceptsfd: 

712 open_new = (fd is None) 

713 try: 

714 if open_new: 

715 mode = 'wb' if io.isbinary else 'w' 

716 if append: 

717 mode = mode.replace('w', 'a') 

718 fd = open_with_compression(filename, mode) 

719 # XXX remember to re-enable compressed open 

720 # fd = io.open(filename, mode) 

721 return io.write(fd, images, **kwargs) 

722 finally: 

723 if open_new and fd is not None: 

724 fd.close() 

725 else: 

726 if fd is not None: 

727 raise ValueError("Can't write {}-format to file-descriptor" 

728 .format(format)) 

729 if io.can_append: 

730 return io.write(filename, images, append=append, **kwargs) 

731 elif append: 

732 raise ValueError("Cannot append to {}-format, write-function " 

733 "does not support the append keyword." 

734 .format(format)) 

735 else: 

736 return io.write(filename, images, **kwargs) 

737 

738 

739def read( 

740 filename: NameOrFile, 

741 index: Any = None, 

742 format: Optional[str] = None, 

743 parallel: bool = True, 

744 do_not_split_by_at_sign: bool = False, 

745 **kwargs 

746) -> Union[Atoms, List[Atoms]]: 

747 """Read Atoms object(s) from file. 

748 

749 filename: str or file 

750 Name of the file to read from or a file descriptor. 

751 index: int, slice or str 

752 The last configuration will be returned by default. Examples: 

753 

754 * ``index=0``: first configuration 

755 * ``index=-2``: second to last 

756 * ``index=':'`` or ``index=slice(None)``: all 

757 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

758 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

759 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

760 format: str 

761 Used to specify the file-format. If not given, the 

762 file-format will be guessed by the *filetype* function. 

763 parallel: bool 

764 Default is to read on master and broadcast to slaves. Use 

765 parallel=False to read on all slaves. 

766 do_not_split_by_at_sign: bool 

767 If False (default) ``filename`` is splitted by at sign ``@`` 

768 

769 Many formats allow on open file-like object to be passed instead 

770 of ``filename``. In this case the format cannot be auto-detected, 

771 so the ``format`` argument should be explicitly given.""" 

772 

773 if isinstance(filename, PurePath): 

774 filename = str(filename) 

775 if filename == '-': 

776 filename = sys.stdin 

777 if isinstance(index, str): 

778 try: 

779 index = string2index(index) 

780 except ValueError: 

781 pass 

782 

783 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

784 if index is None: 

785 index = -1 

786 format = format or filetype(filename, read=isinstance(filename, str)) 

787 

788 io = get_ioformat(format) 

789 if isinstance(index, (slice, str)): 

790 return list(_iread(filename, index, format, io, parallel=parallel, 

791 **kwargs)) 

792 else: 

793 return next(_iread(filename, slice(index, None), format, io, 

794 parallel=parallel, **kwargs)) 

795 

796 

797def iread( 

798 filename: NameOrFile, 

799 index: Any = None, 

800 format: str = None, 

801 parallel: bool = True, 

802 do_not_split_by_at_sign: bool = False, 

803 **kwargs 

804) -> Iterator[Atoms]: 

805 """Iterator for reading Atoms objects from file. 

806 

807 Works as the `read` function, but yields one Atoms object at a time 

808 instead of all at once.""" 

809 

810 if isinstance(filename, PurePath): 

811 filename = str(filename) 

812 

813 if isinstance(index, str): 

814 index = string2index(index) 

815 

816 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

817 

818 if index is None or index == ':': 

819 index = slice(None, None, None) 

820 

821 if not isinstance(index, (slice, str)): 

822 index = slice(index, (index + 1) or None) 

823 

824 format = format or filetype(filename, read=isinstance(filename, str)) 

825 io = get_ioformat(format) 

826 

827 yield from _iread(filename, index, format, io, parallel=parallel, 

828 **kwargs) 

829 

830 

831@parallel_generator 

832def _iread(filename, index, format, io, parallel=None, full_output=False, 

833 **kwargs): 

834 

835 if not io.can_read: 

836 raise ValueError(f"Can't read from {format}-format") 

837 

838 if io.single: 

839 start = index.start 

840 assert start is None or start == 0 or start == -1 

841 args = () 

842 else: 

843 args = (index,) 

844 

845 must_close_fd = False 

846 if isinstance(filename, str): 

847 if io.acceptsfd: 

848 mode = 'rb' if io.isbinary else 'r' 

849 fd = open_with_compression(filename, mode) 

850 must_close_fd = True 

851 else: 

852 fd = filename 

853 else: 

854 assert io.acceptsfd 

855 fd = filename 

856 

857 # Make sure fd is closed in case loop doesn't finish: 

858 try: 

859 for dct in io.read(fd, *args, **kwargs): 

860 if not isinstance(dct, dict): 

861 dct = {'atoms': dct} 

862 if full_output: 

863 yield dct 

864 else: 

865 yield dct['atoms'] 

866 finally: 

867 if must_close_fd: 

868 fd.close() 

869 

870 

871def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

872 if not isinstance(filename, str): 

873 return filename, index 

874 

875 basename = os.path.basename(filename) 

876 if do_not_split_by_at_sign or '@' not in basename: 

877 return filename, index 

878 

879 newindex = None 

880 newfilename, newindex = filename.rsplit('@', 1) 

881 

882 if isinstance(index, slice): 

883 return newfilename, index 

884 try: 

885 newindex = string2index(newindex) 

886 except ValueError: 

887 warnings.warn('Can not parse index for path \n' 

888 ' "%s" \nConsider set ' 

889 'do_not_split_by_at_sign=True \nif ' 

890 'there is no index.' % filename) 

891 return newfilename, newindex 

892 

893 

894def match_magic(data: bytes) -> IOFormat: 

895 data = data[:PEEK_BYTES] 

896 for ioformat in ioformats.values(): 

897 if ioformat.match_magic(data): 

898 return ioformat 

899 raise UnknownFileTypeError('Cannot guess file type from contents') 

900 

901 

902def filetype( 

903 filename: NameOrFile, 

904 read: bool = True, 

905 guess: bool = True, 

906) -> str: 

907 """Try to guess the type of the file. 

908 

909 First, special signatures in the filename will be checked for. If that 

910 does not identify the file type, then the first 2000 bytes of the file 

911 will be read and analysed. Turn off this second part by using 

912 read=False. 

913 

914 Can be used from the command-line also:: 

915 

916 $ ase info filename ... 

917 """ 

918 

919 orig_filename = filename 

920 if hasattr(filename, 'name'): 

921 filename = filename.name 

922 

923 ext = None 

924 if isinstance(filename, str): 

925 if os.path.isdir(filename): 

926 if os.path.basename(os.path.normpath(filename)) == 'states': 

927 return 'eon' 

928 return 'bundletrajectory' 

929 

930 if filename.startswith('postgres'): 

931 return 'postgresql' 

932 

933 if filename.startswith('mysql') or filename.startswith('mariadb'): 

934 return 'mysql' 

935 

936 if filename.endswith('aselmdb'): 

937 return 'db' 

938 

939 # strip any compression extensions that can be read 

940 root, _compression = get_compression(filename) 

941 basename = os.path.basename(root) 

942 

943 if '.' in basename: 

944 ext = os.path.splitext(basename)[1].strip('.').lower() 

945 

946 for fmt in ioformats.values(): 

947 if fmt.match_name(basename): 

948 return fmt.name 

949 

950 if not read: 

951 if ext is None: 

952 raise UnknownFileTypeError('Could not guess file type') 

953 ioformat = extension2format.get(ext) 

954 if ioformat: 

955 return ioformat.name 

956 

957 # askhl: This is strange, we don't know if ext is a format: 

958 return ext 

959 

960 if orig_filename == filename: 

961 fd = open_with_compression(filename, 'rb') 

962 else: 

963 fd = orig_filename # type: ignore[assignment] 

964 else: 

965 fd = filename 

966 if fd is sys.stdin: 

967 return 'json' 

968 

969 data = fd.read(PEEK_BYTES) 

970 if fd is not filename: 

971 fd.close() 

972 else: 

973 fd.seek(0) 

974 

975 if len(data) == 0: 

976 raise UnknownFileTypeError('Empty file: ' + filename) 

977 

978 try: 

979 return match_magic(data).name 

980 except UnknownFileTypeError: 

981 pass 

982 

983 format = None 

984 if ext in extension2format: 

985 format = extension2format[ext].name 

986 

987 if format is None and guess: 

988 format = ext 

989 if format is None: 

990 # Do quick xyz check: 

991 lines = data.splitlines() 

992 if lines and lines[0].strip().isdigit(): 

993 return extension2format['xyz'].name 

994 

995 raise UnknownFileTypeError('Could not guess file type') 

996 assert isinstance(format, str) 

997 return format 

998 

999 

1000def index2range(index: int | slice, length: int) -> range: 

1001 """Convert slice or integer to range. 

1002 

1003 If index is an integer, range will contain only that integer.""" 

1004 if isinstance(index, int): 

1005 return range(index, index + 1) 

1006 return range(length)[index]