Coverage for ase / io / formats.py: 90.99%
533 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
1# fmt: off
3"""File formats.
5This module implements the read(), iread() and write() functions in ase.io.
6For each file format there is an IOFormat object.
8There is a dict, ioformats, which stores the objects.
10Example
11=======
13The xyz format is implemented in the ase/io/xyz.py file which has a
14read_xyz() generator and a write_xyz() function. This and other
15information can be obtained from ioformats['xyz'].
16"""
18import functools
19import inspect
20import io
21import os
22import re
23import sys
24import warnings
25from collections.abc import Iterator, Sequence
26from importlib import import_module
27from importlib.metadata import entry_points
28from pathlib import PurePath
29from typing import (
30 IO,
31 Any,
32)
34from ase.atoms import Atoms
35from ase.parallel import parallel_function, parallel_generator
36from ase.utils import string2index
37from ase.utils.plugins import ExternalIOFormat
39PEEK_BYTES = 50000
42class UnknownFileTypeError(Exception):
43 pass
46class IOFormat:
47 def __init__(self, name: str, desc: str, code: str, module_name: str,
48 encoding: str | None = None) -> None:
49 self.name = name
50 self.description = desc
51 assert len(code) == 2
52 assert code[0] in list('+1')
53 assert code[1] in list('BFS')
54 self.code = code
55 self.module_name = module_name
56 self.encoding = encoding
58 # (To be set by define_io_format())
59 self.extensions: list[str] = []
60 self.globs: list[str] = []
61 self.magic: list[str] = []
62 self.magic_regex: bytes | None = None
64 def _buf_as_filelike(self, data: str | bytes) -> IO:
65 encoding = self.encoding
66 if encoding is None:
67 encoding = 'utf-8' # Best hacky guess.
69 if self.isbinary:
70 if isinstance(data, str):
71 data = data.encode(encoding)
72 else:
73 if isinstance(data, bytes):
74 data = data.decode(encoding)
76 return self._ioclass(data)
78 @property
79 def _ioclass(self):
80 if self.isbinary:
81 return io.BytesIO
82 else:
83 return io.StringIO
85 def parse_images(self, data: str | bytes,
86 **kwargs) -> Sequence[Atoms]:
87 with self._buf_as_filelike(data) as fd:
88 outputs = self.read(fd, **kwargs)
89 if self.single:
90 assert isinstance(outputs, Atoms)
91 return [outputs]
92 else:
93 return list(self.read(fd, **kwargs))
95 def parse_atoms(self, data: str | bytes, **kwargs) -> Atoms:
96 images = self.parse_images(data, **kwargs)
97 return images[-1]
99 @property
100 def can_read(self) -> bool:
101 return self._readfunc() is not None
103 @property
104 def can_write(self) -> bool:
105 return self._writefunc() is not None
107 @property
108 def can_append(self) -> bool:
109 writefunc = self._writefunc()
110 return self.can_write and 'append' in writefunc.__code__.co_varnames
112 def __repr__(self) -> str:
113 tokens = [f'{name}={value!r}'
114 for name, value in vars(self).items()]
115 return 'IOFormat({})'.format(', '.join(tokens))
117 def __getitem__(self, i):
118 # For compatibility.
119 #
120 # Historically, the ioformats were listed as tuples
121 # with (description, code). We look like such a tuple.
122 return (self.description, self.code)[i]
124 @property
125 def single(self) -> bool:
126 """Whether this format is for a single Atoms object."""
127 return self.code[0] == '1'
129 @property
130 def _formatname(self) -> str:
131 return self.name.replace('-', '_')
133 def _readfunc(self):
134 return getattr(self.module, 'read_' + self._formatname, None)
136 def _writefunc(self):
137 return getattr(self.module, 'write_' + self._formatname, None)
139 @property
140 def read(self):
141 if not self.can_read:
142 self._warn_none('read')
143 return None
145 return self._read_wrapper
147 def _read_wrapper(self, *args, **kwargs):
148 function = self._readfunc()
149 if function is None:
150 self._warn_none('read')
151 return None
152 if not inspect.isgeneratorfunction(function):
153 function = functools.partial(wrap_read_function, function)
154 return function(*args, **kwargs)
156 def _warn_none(self, action):
157 msg = ('Accessing the IOFormat.{action} property on a format '
158 'without {action} support will change behaviour in the '
159 'future and return a callable instead of None. '
160 'Use IOFormat.can_{action} to check whether {action} '
161 'is supported.')
162 warnings.warn(msg.format(action=action), FutureWarning)
164 @property
165 def write(self):
166 if not self.can_write:
167 self._warn_none('write')
168 return None
170 return self._write_wrapper
172 def _write_wrapper(self, *args, **kwargs):
173 function = self._writefunc()
174 if function is None:
175 raise ValueError(f'Cannot write to {self.name}-format')
176 return function(*args, **kwargs)
178 @property
179 def modes(self) -> str:
180 modes = ''
181 if self.can_read:
182 modes += 'r'
183 if self.can_write:
184 modes += 'w'
185 return modes
187 def full_description(self) -> str:
188 lines = [f'Name: {self.name}',
189 f'Description: {self.description}',
190 f'Modes: {self.modes}',
191 f'Encoding: {self.encoding}',
192 f'Module: {self.module_name}',
193 f'Code: {self.code}',
194 f'Extensions: {self.extensions}',
195 f'Globs: {self.globs}',
196 f'Magic: {self.magic}']
197 return '\n'.join(lines)
199 @property
200 def acceptsfd(self) -> bool:
201 return self.code[1] != 'S'
203 @property
204 def isbinary(self) -> bool:
205 return self.code[1] == 'B'
207 @property
208 def module(self):
209 try:
210 return import_module(self.module_name)
211 except ImportError as err:
212 raise UnknownFileTypeError(
213 f'File format not recognized: {self.name}. Error: {err}')
215 def match_name(self, basename: str) -> bool:
216 from fnmatch import fnmatch
217 return any(fnmatch(basename, pattern)
218 for pattern in self.globs)
220 def match_magic(self, data: bytes) -> bool:
221 if self.magic_regex:
222 assert not self.magic, 'Define only one of magic and magic_regex'
223 match = re.match(self.magic_regex, data, re.M | re.S)
224 return match is not None
226 from fnmatch import fnmatchcase
227 return any(
228 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
229 for magic in self.magic
230 )
233ioformats: dict[str, IOFormat] = {} # These will be filled at run-time.
234extension2format = {}
237all_formats = ioformats # Aliased for compatibility only. Please do not use.
238format2modulename = {} # Left for compatibility only.
241def define_io_format(name, desc, code, *, module=None, ext=None,
242 glob=None, magic=None, encoding=None,
243 magic_regex=None, external=False):
244 if module is None:
245 module = name.replace('-', '_')
246 format2modulename[name] = module
248 if not external:
249 module = 'ase.io.' + module
251 def normalize_patterns(strings):
252 if strings is None:
253 strings = []
254 elif isinstance(strings, (str, bytes)):
255 strings = [strings]
256 else:
257 strings = list(strings)
258 return strings
260 fmt = IOFormat(name, desc, code, module_name=module,
261 encoding=encoding)
262 fmt.extensions = normalize_patterns(ext)
263 fmt.globs = normalize_patterns(glob)
264 fmt.magic = normalize_patterns(magic)
266 if magic_regex is not None:
267 fmt.magic_regex = magic_regex
269 for ext in fmt.extensions:
270 if ext in extension2format:
271 raise ValueError(f'extension "{ext}" already registered')
272 extension2format[ext] = fmt
274 ioformats[name] = fmt
275 return fmt
278def get_ioformat(name: str) -> IOFormat:
279 """Return ioformat object or raise appropriate error."""
280 if name not in ioformats:
281 raise UnknownFileTypeError(name)
282 fmt = ioformats[name]
283 # Make sure module is importable, since this could also raise an error.
284 fmt.module
285 return ioformats[name]
288def register_external_io_formats(group):
289 if hasattr(entry_points(), 'select'):
290 fmt_entry_points = entry_points().select(group=group)
291 else:
292 fmt_entry_points = entry_points().get(group, ())
294 for entry_point in fmt_entry_points:
295 try:
296 define_external_io_format(entry_point)
297 except Exception as exc:
298 warnings.warn(
299 'Failed to register external '
300 f'IO format {entry_point.name}: {exc}'
301 )
304def define_external_io_format(entry_point):
306 fmt = entry_point.load()
307 if entry_point.name in ioformats:
308 raise ValueError(f'Format {entry_point.name} already defined')
309 if not isinstance(fmt, ExternalIOFormat):
310 raise TypeError('Wrong type for registering external IO formats '
311 f'in format {entry_point.name}, expected '
312 'ExternalIOFormat')
313 F(entry_point.name, **fmt._asdict(), external=True)
316# We define all the IO formats below. Each IO format has a code,
317# such as '1F', which defines some of the format's properties:
318#
319# 1=single atoms object
320# +=multiple atoms objects
321# F=accepts a file-descriptor
322# S=needs a file-name str
323# B=like F, but opens in binary mode
325F = define_io_format
326F('abinit-gsr', 'ABINIT GSR file', '1S',
327 module='abinit', glob='*o_GSR.nc')
328F('abinit-in', 'ABINIT input file', '1F',
329 module='abinit', magic=b'*znucl *')
330F('abinit-out', 'ABINIT output file', '1F',
331 module='abinit', magic=b'*.Version * of ABINIT')
332F('aims', 'FHI-aims geometry file', '1S', ext='in')
333F('aims-output', 'FHI-aims output', '+S',
334 module='aims', magic=b'*Invoking FHI-aims ...')
335F('bundletrajectory', 'ASE bundle trajectory', '+S')
336# XXX: Define plugin in ase db backends package:
337# F('aselmdb', 'ASE LMDB format', '+F')
338F('castep-castep', 'CASTEP output file', '+F',
339 module='castep', ext='castep')
340F('castep-cell', 'CASTEP geom file', '1F',
341 module='castep', ext='cell')
342F('castep-geom', 'CASTEP trajectory file', '+F',
343 module='castep', ext='geom')
344F('castep-md', 'CASTEP molecular dynamics file', '+F',
345 module='castep', ext='md')
346F('castep-phonon', 'CASTEP phonon file', '1F',
347 module='castep', ext='phonon')
348F('cfg', 'AtomEye configuration', '1F')
349F('cif', 'CIF-file', '+B', ext='cif')
350F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
351F('cjson', 'Chemical json file', '1F', ext='cjson')
352F('cp2k-dcd', 'CP2K DCD file', '+B',
353 module='cp2k', ext='dcd')
354F('cp2k-restart', 'CP2K restart file', '1F',
355 module='cp2k', ext='restart')
356F('crystal', 'Crystal fort.34 format', '1F',
357 ext=['f34', '34'], glob=['f34', '34'])
358F('cube', 'CUBE file', '1F', ext='cube')
359F('dacapo-text', 'Dacapo text output', '1F',
360 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
361F('db', 'ASE SQLite database file', '+S')
362F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
363F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
364 module='dlp4', ext='config', glob=['*CONFIG*'])
365F('dlp-history', 'DL_POLY HISTORY file', '+F',
366 module='dlp4', glob='HISTORY')
367F('dmol-arc', 'DMol3 arc file', '+S',
368 module='dmol', ext='arc')
369F('dmol-car', 'DMol3 structure file', '1S',
370 module='dmol', ext='car')
371F('dmol-incoor', 'DMol3 structure file', '1S',
372 module='dmol')
373F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
374 glob=['GEOMETRY.OUT'])
375F('elk-in', 'ELK input file', '1F', module='elk')
376F('eon', 'EON CON file', '+F',
377 ext='con')
378F('eps', 'Encapsulated Postscript', '1S')
379F('espresso-in', 'Quantum espresso in file', '1F',
380 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
381F('espresso-out', 'Quantum espresso out file', '+F',
382 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
383F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
384F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
385F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
386F('findsym', 'FINDSYM-format', '+F')
387F('gamess-us-out', 'GAMESS-US output file', '1F',
388 module='gamess_us', magic=b'*GAMESS')
389F('gamess-us-in', 'GAMESS-US input file', '1F',
390 module='gamess_us')
391F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
392 module='gamess_us', magic=b' $DATA', ext='dat')
393F('gaussian-in', 'Gaussian com (input) file', '1F',
394 module='gaussian', ext=['com', 'gjf'])
395F('gaussian-out', 'Gaussian output file', '+F',
396 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
397F('acemolecule-out', 'ACE output file', '1S',
398 module='acemolecule')
399F('acemolecule-input', 'ACE input file', '1S',
400 module='acemolecule')
401F('gen', 'DFTBPlus GEN format', '1F')
402F('gif', 'Graphics interchange format', '+S',
403 module='animation')
404F('gpaw-out', 'GPAW text output', '+F',
405 magic=b'* ___ ___ ___ _ _ _')
406F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
407F('gpw', 'GPAW restart-file', '1S',
408 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
409F('gromacs', 'Gromacs coordinates', '1F',
410 ext='gro')
411F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
412F('html', 'X3DOM HTML', '1F', module='x3d')
413F('json', 'ASE JSON database file', '+F', ext='json', module='db')
414F('jsv', 'JSV file format', '1F')
415F('lammps-dump-text', 'LAMMPS text dump file', '+F',
416 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
417F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
418 module='lammpsrun')
419F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
420 encoding='ascii')
421F('magres', 'MAGRES ab initio NMR data file', '1F')
422F('mol', 'MDL Molfile', '1F')
423F('mp4', 'MP4 animation', '+S',
424 module='animation')
425F('mustem', 'muSTEM xtl file', '1F',
426 ext='xtl')
427F('mysql', 'ASE MySQL database file', '+S',
428 module='db')
429F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
430 magic=b'CDF')
431F('nomad-json', 'JSON from Nomad archive', '+F',
432 ext='nomad-json')
433F('nwchem-in', 'NWChem input file', '1F',
434 module='nwchem', ext='nwi')
435F('nwchem-out', 'NWChem output file', '+F',
436 module='nwchem', ext='nwo',
437 magic=b'*Northwest Computational Chemistry Package')
438F('octopus-in', 'Octopus input file', '1F',
439 module='octopus', glob='inp')
440F('onetep-out', 'ONETEP output file', '+F',
441 module='onetep',
442 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
443F('onetep-in', 'ONETEP input file', '1F',
444 module='onetep',
445 magic=[b'*lock species ',
446 b'*LOCK SPECIES ',
447 b'*--- INPUT FILE ---*'])
448F('orca-output', 'ORCA output', '+F',
449 module='orca', magic=b'* O R C A *')
450F('proteindatabank', 'Protein Data Bank', '+F',
451 ext='pdb')
452F('png', 'Portable Network Graphics', '1B')
453F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
454F('pov', 'Persistance of Vision', '1S')
455# prismatic: Should have ext='xyz' if/when multiple formats can have the same
456# extension
457F('prismatic', 'prismatic and computem XYZ-file', '1F')
458F('py', 'Python file', '+F')
459F('sys', 'qball sys file', '1F')
460F('qbox', 'QBOX output file', '+F',
461 magic=b'*:simulation xmlns:')
462F('res', 'SHELX format', '1S', ext='shelx')
463F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
464F('sdf', 'SDF format', '1F')
465F('siesta-xv', 'Siesta .XV file', '1F',
466 glob='*.XV', module='siesta')
467F('struct', 'WIEN2k structure file', '1S', module='wien2k')
468F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
469F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
470 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
471F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
472 magic=b'$coord')
473F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
474 module='turbomole', glob='gradient', magic=b'$grad')
475F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
476F('vasp', 'VASP POSCAR/CONTCAR', '1F',
477 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
478F('vasp-out', 'VASP OUTCAR file', '+F',
479 module='vasp', glob='*OUTCAR*')
480F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
481 module='vasp', glob='*XDATCAR*')
482F('vasp-xml', 'VASP vasprun.xml file', '+F',
483 module='vasp', glob='*vasp*.xml')
484F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
485F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
486F('wout', 'Wannier90 output', '1F', module='wannier90')
487F('x3d', 'X3D', '1S')
488F('xsd', 'Materials Studio file', '1F')
489F('xsf', 'XCrySDen Structure File', '+F',
490 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
491 b'*\nMOLECULE', b'*\nATOMS'])
492F('xtd', 'Materials Studio file', '+F')
493# xyz: No `ext='xyz'` in the definition below.
494# The .xyz files are handled by the extxyz module by default.
495F('xyz', 'XYZ-file', '+F')
497# Register IO formats exposed through the ase.ioformats entry point
498register_external_io_formats('ase.ioformats')
501def get_compression(filename: str) -> tuple[str, str | None]:
502 """
503 Parse any expected file compression from the extension of a filename.
504 Return the filename without the extension, and the extension. Recognises
505 ``.gz``, ``.bz2``, ``.xz``.
507 >>> get_compression('H2O.pdb.gz')
508 ('H2O.pdb', 'gz')
509 >>> get_compression('crystal.cif')
510 ('crystal.cif', None)
512 Parameters
513 ----------
514 filename: str
515 Full filename including extension.
517 Returns
518 -------
519 (root, extension): (str, str or None)
520 Filename split into root without extension, and the extension
521 indicating compression format. Will not split if compression
522 is not recognised.
523 """
524 # Update if anything is added
525 valid_compression = ['gz', 'bz2', 'xz']
527 # Use stdlib as it handles most edge cases
528 root, compression = os.path.splitext(filename)
530 # extension keeps the '.' so remember to remove it
531 if compression.strip('.') in valid_compression:
532 return root, compression.strip('.')
533 else:
534 return filename, None
537def open_with_compression(filename: str, mode: str = 'r') -> IO:
538 """
539 Wrapper around builtin `open` that will guess compression of a file
540 from the filename and open it for reading or writing as if it were
541 a standard file.
543 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
545 Supported modes are:
546 * 'r', 'rt', 'w', 'wt' for text mode read and write.
547 * 'rb, 'wb' for binary read and write.
549 Parameters
550 ----------
551 filename: str
552 Path to the file to open, including any extensions that indicate
553 the compression used.
554 mode: str
555 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
557 Returns
558 -------
559 fd: file
560 File-like object open with the specified mode.
561 """
563 # Compressed formats sometimes default to binary, so force text mode.
564 if mode == 'r':
565 mode = 'rt'
566 elif mode == 'w':
567 mode = 'wt'
568 elif mode == 'a':
569 mode = 'at'
571 _root, compression = get_compression(filename)
573 if compression == 'gz':
574 import gzip
575 return gzip.open(filename, mode=mode) # type: ignore[return-value]
576 elif compression == 'bz2':
577 import bz2
578 return bz2.open(filename, mode=mode)
579 elif compression == 'xz':
580 import lzma
581 return lzma.open(filename, mode)
582 else:
583 # Either None or unknown string
584 return open(filename, mode)
587def is_compressed(fd: io.BufferedIOBase) -> bool:
588 """Check if the file object is in a compressed format."""
589 compressed = False
591 # We'd like to avoid triggering imports unless already imported.
592 # Also, Python can be compiled without e.g. lzma so we need to
593 # protect against that:
594 if 'gzip' in sys.modules:
595 import gzip
596 compressed = compressed or isinstance(fd, gzip.GzipFile)
597 if 'bz2' in sys.modules:
598 import bz2
599 compressed = compressed or isinstance(fd, bz2.BZ2File)
600 if 'lzma' in sys.modules:
601 import lzma
602 compressed = compressed or isinstance(fd, lzma.LZMAFile)
603 return compressed
606def wrap_read_function(read, filename, index=None, **kwargs):
607 """Convert read-function to generator."""
608 if index is None:
609 yield read(filename, **kwargs)
610 else:
611 yield from read(filename, index, **kwargs)
614NameOrFile = str | PurePath | IO
617def write(
618 filename: NameOrFile,
619 images: Atoms | Sequence[Atoms],
620 format: str | None = None,
621 parallel: bool = True,
622 append: bool = False,
623 **kwargs: Any
624) -> None:
625 """Write Atoms object(s) to file.
627 filename: str or file
628 Name of the file to write to or a file descriptor. The name '-'
629 means standard output.
630 images: Atoms object or list of Atoms objects
631 A single Atoms object or a list of Atoms objects.
632 format: str
633 Used to specify the file-format. If not given, the
634 file-format will be taken from suffix of the filename.
635 parallel: bool
636 Default is to write on master only. Use parallel=False to write
637 from all slaves.
638 append: bool
639 Default is to open files in 'w' or 'wb' mode, overwriting
640 existing files. In some cases opening the file in 'a' or 'ab'
641 mode (appending) is useful,
642 e.g. writing trajectories or saving multiple Atoms objects in one file.
643 WARNING: If the file format does not support multiple entries without
644 additional keywords/headers, files created using 'append=True'
645 might not be readable by any program! They will nevertheless be
646 written without error message.
648 The use of additional keywords is format specific. write() may
649 return an object after writing certain formats, but this behaviour
650 may change in the future.
652 """
654 if isinstance(filename, PurePath):
655 filename = str(filename)
657 if isinstance(filename, str):
658 fd = None
659 if filename == '-':
660 fd = sys.stdout
661 filename = None # type: ignore[assignment]
662 elif format is None:
663 format = filetype(filename, read=False)
664 assert isinstance(format, str)
665 else:
666 fd = filename # type: ignore[assignment]
667 if format is None:
668 try:
669 format = filetype(filename, read=False)
670 assert isinstance(format, str)
671 except UnknownFileTypeError:
672 format = None
673 filename = None # type: ignore[assignment]
675 format = format or 'json' # default is json
677 io = get_ioformat(format)
679 return _write(filename, fd, format, io, images,
680 parallel=parallel, append=append, **kwargs)
683@parallel_function
684def _write(filename, fd, format, io, images, parallel=None, append=False,
685 **kwargs):
686 if isinstance(images, Atoms):
687 images = [images]
689 if io.single:
690 if len(images) > 1:
691 raise ValueError('{}-format can only store 1 Atoms object.'
692 .format(format))
693 images = images[0]
695 if not io.can_write:
696 raise ValueError(f"Can't write to {format}-format")
698 # Special case for json-format:
699 if format == 'json' and (len(images) > 1 or append):
700 if filename is not None:
701 return io.write(filename, images, append=append, **kwargs)
702 raise ValueError("Can't write more than one image to file-descriptor "
703 'using json-format.')
705 if io.acceptsfd:
706 open_new = (fd is None)
707 try:
708 if open_new:
709 mode = 'wb' if io.isbinary else 'w'
710 if append:
711 mode = mode.replace('w', 'a')
712 fd = open_with_compression(filename, mode)
713 # XXX remember to re-enable compressed open
714 # fd = io.open(filename, mode)
715 return io.write(fd, images, **kwargs)
716 finally:
717 if open_new and fd is not None:
718 fd.close()
719 else:
720 if fd is not None:
721 raise ValueError("Can't write {}-format to file-descriptor"
722 .format(format))
723 if io.can_append:
724 return io.write(filename, images, append=append, **kwargs)
725 elif append:
726 raise ValueError("Cannot append to {}-format, write-function "
727 "does not support the append keyword."
728 .format(format))
729 else:
730 return io.write(filename, images, **kwargs)
733def read(
734 filename: NameOrFile,
735 index: Any | None = None,
736 format: str | None = None,
737 parallel: bool = True,
738 do_not_split_by_at_sign: bool = False,
739 **kwargs
740) -> Atoms | list[Atoms]:
741 """Read Atoms object(s) from file.
743 filename: str or file
744 Name of the file to read from or a file descriptor.
745 index: int, slice or str
746 The last configuration will be returned by default. Examples:
748 * ``index=0``: first configuration
749 * ``index=-2``: second to last
750 * ``index=':'`` or ``index=slice(None)``: all
751 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
752 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
753 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
754 format: str
755 Used to specify the file-format. If not given, the
756 file-format will be guessed by the *filetype* function.
757 parallel: bool
758 Default is to read on master and broadcast to slaves. Use
759 parallel=False to read on all slaves.
760 do_not_split_by_at_sign: bool
761 If False (default) ``filename`` is splitted by at sign ``@``
763 Many formats allow on open file-like object to be passed instead
764 of ``filename``. In this case the format cannot be auto-detected,
765 so the ``format`` argument should be explicitly given."""
767 if isinstance(filename, PurePath):
768 filename = str(filename)
769 if filename == '-':
770 filename = sys.stdin
771 if isinstance(index, str):
772 try:
773 index = string2index(index)
774 except ValueError:
775 pass
777 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
778 if index is None:
779 index = -1
780 format = format or filetype(filename, read=isinstance(filename, str))
782 io = get_ioformat(format)
783 if isinstance(index, (slice, str)):
784 return list(_iread(filename, index, format, io, parallel=parallel,
785 **kwargs))
786 else:
787 return next(_iread(filename, slice(index, None), format, io,
788 parallel=parallel, **kwargs))
791def iread(
792 filename: NameOrFile,
793 index: Any | None = None,
794 format: str | None = None,
795 parallel: bool = True,
796 do_not_split_by_at_sign: bool = False,
797 **kwargs
798) -> Iterator[Atoms]:
799 """Iterator for reading Atoms objects from file.
801 Works as the `read` function, but yields one Atoms object at a time
802 instead of all at once."""
804 if isinstance(filename, PurePath):
805 filename = str(filename)
807 if isinstance(index, str):
808 index = string2index(index)
810 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
812 if index is None or index == ':':
813 index = slice(None, None, None)
815 if not isinstance(index, (slice, str)):
816 index = slice(index, (index + 1) or None)
818 format = format or filetype(filename, read=isinstance(filename, str))
819 io = get_ioformat(format)
821 yield from _iread(filename, index, format, io, parallel=parallel,
822 **kwargs)
825@parallel_generator
826def _iread(filename, index, format, io, parallel=None, full_output=False,
827 **kwargs):
829 if not io.can_read:
830 raise ValueError(f"Can't read from {format}-format")
832 if io.single:
833 start = index.start
834 assert start is None or start == 0 or start == -1
835 args = ()
836 else:
837 args = (index,)
839 must_close_fd = False
840 if isinstance(filename, str):
841 if io.acceptsfd:
842 mode = 'rb' if io.isbinary else 'r'
843 fd = open_with_compression(filename, mode)
844 must_close_fd = True
845 else:
846 fd = filename
847 else:
848 assert io.acceptsfd
849 fd = filename
851 # Make sure fd is closed in case loop doesn't finish:
852 try:
853 for dct in io.read(fd, *args, **kwargs):
854 if not isinstance(dct, dict):
855 dct = {'atoms': dct}
856 if full_output:
857 yield dct
858 else:
859 yield dct['atoms']
860 finally:
861 if must_close_fd:
862 fd.close()
865def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
866 if not isinstance(filename, str):
867 return filename, index
869 basename = os.path.basename(filename)
870 if do_not_split_by_at_sign or '@' not in basename:
871 return filename, index
873 newindex = None
874 newfilename, newindex = filename.rsplit('@', 1)
876 if isinstance(index, slice):
877 return newfilename, index
878 try:
879 newindex = string2index(newindex)
880 except ValueError:
881 warnings.warn('Can not parse index for path \n'
882 ' "%s" \nConsider set '
883 'do_not_split_by_at_sign=True \nif '
884 'there is no index.' % filename)
885 return newfilename, newindex
888def match_magic(data: bytes) -> IOFormat:
889 data = data[:PEEK_BYTES]
890 for ioformat in ioformats.values():
891 if ioformat.match_magic(data):
892 return ioformat
893 raise UnknownFileTypeError('Cannot guess file type from contents')
896def filetype(
897 filename: NameOrFile,
898 read: bool = True,
899 guess: bool = True,
900) -> str:
901 """Try to guess the type of the file.
903 First, special signatures in the filename will be checked for. If that
904 does not identify the file type, then the first 2000 bytes of the file
905 will be read and analysed. Turn off this second part by using
906 read=False.
908 Can be used from the command-line also::
910 $ ase info filename ...
911 """
913 orig_filename = filename
914 if hasattr(filename, 'name'):
915 filename = filename.name
917 ext = None
918 if isinstance(filename, str):
919 if os.path.isdir(filename):
920 if os.path.basename(os.path.normpath(filename)) == 'states':
921 return 'eon'
922 return 'bundletrajectory'
924 if filename.startswith('postgres'):
925 return 'postgresql'
927 if filename.startswith('mysql') or filename.startswith('mariadb'):
928 return 'mysql'
930 if filename.endswith('aselmdb'):
931 return 'db'
933 # strip any compression extensions that can be read
934 root, _compression = get_compression(filename)
935 basename = os.path.basename(root)
937 if '.' in basename:
938 ext = os.path.splitext(basename)[1].strip('.').lower()
940 for fmt in ioformats.values():
941 if fmt.match_name(basename):
942 return fmt.name
944 if not read:
945 if ext is None:
946 raise UnknownFileTypeError('Could not guess file type')
947 ioformat = extension2format.get(ext)
948 if ioformat:
949 return ioformat.name
951 # askhl: This is strange, we don't know if ext is a format:
952 return ext
954 if orig_filename == filename:
955 fd = open_with_compression(filename, 'rb')
956 else:
957 fd = orig_filename # type: ignore[assignment]
958 else:
959 fd = filename
960 if fd is sys.stdin:
961 return 'json'
963 data = fd.read(PEEK_BYTES)
964 if fd is not filename:
965 fd.close()
966 else:
967 fd.seek(0)
969 if len(data) == 0:
970 raise UnknownFileTypeError('Empty file: ' + filename)
972 try:
973 return match_magic(data).name
974 except UnknownFileTypeError:
975 pass
977 format = None
978 if ext in extension2format:
979 format = extension2format[ext].name
981 if format is None and guess:
982 format = ext
983 if format is None:
984 # Do quick xyz check:
985 lines = data.splitlines()
986 if lines and lines[0].strip().isdigit():
987 return extension2format['xyz'].name
989 raise UnknownFileTypeError('Could not guess file type')
990 assert isinstance(format, str)
991 return format
994def index2range(index: int | slice, length: int) -> range:
995 """Convert slice or integer to range.
997 If index is an integer, range will contain only that integer."""
998 if isinstance(index, int):
999 return range(index, index + 1)
1000 return range(length)[index]