Coverage for ase / io / formats.py: 90.98%
532 statements
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:20 +0000
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:20 +0000
1# fmt: off
3"""File formats.
5This module implements the read(), iread() and write() functions in ase.io.
6For each file format there is an IOFormat object.
8There is a dict, ioformats, which stores the objects.
10Example
11=======
13The xyz format is implemented in the ase/io/xyz.py file which has a
14read_xyz() generator and a write_xyz() function. This and other
15information can be obtained from ioformats['xyz'].
16"""
18import functools
19import inspect
20import io
21import os
22import re
23import sys
24import warnings
25from importlib import import_module
26from importlib.metadata import entry_points
27from pathlib import PurePath
28from typing import (
29 IO,
30 Any,
31 Dict,
32 Iterator,
33 List,
34 Optional,
35 Sequence,
36 Tuple,
37 Union,
38)
40from ase.atoms import Atoms
41from ase.parallel import parallel_function, parallel_generator
42from ase.utils import string2index
43from ase.utils.plugins import ExternalIOFormat
45PEEK_BYTES = 50000
48class UnknownFileTypeError(Exception):
49 pass
52class IOFormat:
53 def __init__(self, name: str, desc: str, code: str, module_name: str,
54 encoding: str = None) -> None:
55 self.name = name
56 self.description = desc
57 assert len(code) == 2
58 assert code[0] in list('+1')
59 assert code[1] in list('BFS')
60 self.code = code
61 self.module_name = module_name
62 self.encoding = encoding
64 # (To be set by define_io_format())
65 self.extensions: List[str] = []
66 self.globs: List[str] = []
67 self.magic: List[str] = []
68 self.magic_regex: Optional[bytes] = None
70 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
71 encoding = self.encoding
72 if encoding is None:
73 encoding = 'utf-8' # Best hacky guess.
75 if self.isbinary:
76 if isinstance(data, str):
77 data = data.encode(encoding)
78 else:
79 if isinstance(data, bytes):
80 data = data.decode(encoding)
82 return self._ioclass(data)
84 @property
85 def _ioclass(self):
86 if self.isbinary:
87 return io.BytesIO
88 else:
89 return io.StringIO
91 def parse_images(self, data: Union[str, bytes],
92 **kwargs) -> Sequence[Atoms]:
93 with self._buf_as_filelike(data) as fd:
94 outputs = self.read(fd, **kwargs)
95 if self.single:
96 assert isinstance(outputs, Atoms)
97 return [outputs]
98 else:
99 return list(self.read(fd, **kwargs))
101 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
102 images = self.parse_images(data, **kwargs)
103 return images[-1]
105 @property
106 def can_read(self) -> bool:
107 return self._readfunc() is not None
109 @property
110 def can_write(self) -> bool:
111 return self._writefunc() is not None
113 @property
114 def can_append(self) -> bool:
115 writefunc = self._writefunc()
116 return self.can_write and 'append' in writefunc.__code__.co_varnames
118 def __repr__(self) -> str:
119 tokens = [f'{name}={value!r}'
120 for name, value in vars(self).items()]
121 return 'IOFormat({})'.format(', '.join(tokens))
123 def __getitem__(self, i):
124 # For compatibility.
125 #
126 # Historically, the ioformats were listed as tuples
127 # with (description, code). We look like such a tuple.
128 return (self.description, self.code)[i]
130 @property
131 def single(self) -> bool:
132 """Whether this format is for a single Atoms object."""
133 return self.code[0] == '1'
135 @property
136 def _formatname(self) -> str:
137 return self.name.replace('-', '_')
139 def _readfunc(self):
140 return getattr(self.module, 'read_' + self._formatname, None)
142 def _writefunc(self):
143 return getattr(self.module, 'write_' + self._formatname, None)
145 @property
146 def read(self):
147 if not self.can_read:
148 self._warn_none('read')
149 return None
151 return self._read_wrapper
153 def _read_wrapper(self, *args, **kwargs):
154 function = self._readfunc()
155 if function is None:
156 self._warn_none('read')
157 return None
158 if not inspect.isgeneratorfunction(function):
159 function = functools.partial(wrap_read_function, function)
160 return function(*args, **kwargs)
162 def _warn_none(self, action):
163 msg = ('Accessing the IOFormat.{action} property on a format '
164 'without {action} support will change behaviour in the '
165 'future and return a callable instead of None. '
166 'Use IOFormat.can_{action} to check whether {action} '
167 'is supported.')
168 warnings.warn(msg.format(action=action), FutureWarning)
170 @property
171 def write(self):
172 if not self.can_write:
173 self._warn_none('write')
174 return None
176 return self._write_wrapper
178 def _write_wrapper(self, *args, **kwargs):
179 function = self._writefunc()
180 if function is None:
181 raise ValueError(f'Cannot write to {self.name}-format')
182 return function(*args, **kwargs)
184 @property
185 def modes(self) -> str:
186 modes = ''
187 if self.can_read:
188 modes += 'r'
189 if self.can_write:
190 modes += 'w'
191 return modes
193 def full_description(self) -> str:
194 lines = [f'Name: {self.name}',
195 f'Description: {self.description}',
196 f'Modes: {self.modes}',
197 f'Encoding: {self.encoding}',
198 f'Module: {self.module_name}',
199 f'Code: {self.code}',
200 f'Extensions: {self.extensions}',
201 f'Globs: {self.globs}',
202 f'Magic: {self.magic}']
203 return '\n'.join(lines)
205 @property
206 def acceptsfd(self) -> bool:
207 return self.code[1] != 'S'
209 @property
210 def isbinary(self) -> bool:
211 return self.code[1] == 'B'
213 @property
214 def module(self):
215 try:
216 return import_module(self.module_name)
217 except ImportError as err:
218 raise UnknownFileTypeError(
219 f'File format not recognized: {self.name}. Error: {err}')
221 def match_name(self, basename: str) -> bool:
222 from fnmatch import fnmatch
223 return any(fnmatch(basename, pattern)
224 for pattern in self.globs)
226 def match_magic(self, data: bytes) -> bool:
227 if self.magic_regex:
228 assert not self.magic, 'Define only one of magic and magic_regex'
229 match = re.match(self.magic_regex, data, re.M | re.S)
230 return match is not None
232 from fnmatch import fnmatchcase
233 return any(
234 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
235 for magic in self.magic
236 )
239ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
240extension2format = {}
243all_formats = ioformats # Aliased for compatibility only. Please do not use.
244format2modulename = {} # Left for compatibility only.
247def define_io_format(name, desc, code, *, module=None, ext=None,
248 glob=None, magic=None, encoding=None,
249 magic_regex=None, external=False):
250 if module is None:
251 module = name.replace('-', '_')
252 format2modulename[name] = module
254 if not external:
255 module = 'ase.io.' + module
257 def normalize_patterns(strings):
258 if strings is None:
259 strings = []
260 elif isinstance(strings, (str, bytes)):
261 strings = [strings]
262 else:
263 strings = list(strings)
264 return strings
266 fmt = IOFormat(name, desc, code, module_name=module,
267 encoding=encoding)
268 fmt.extensions = normalize_patterns(ext)
269 fmt.globs = normalize_patterns(glob)
270 fmt.magic = normalize_patterns(magic)
272 if magic_regex is not None:
273 fmt.magic_regex = magic_regex
275 for ext in fmt.extensions:
276 if ext in extension2format:
277 raise ValueError(f'extension "{ext}" already registered')
278 extension2format[ext] = fmt
280 ioformats[name] = fmt
281 return fmt
284def get_ioformat(name: str) -> IOFormat:
285 """Return ioformat object or raise appropriate error."""
286 if name not in ioformats:
287 raise UnknownFileTypeError(name)
288 fmt = ioformats[name]
289 # Make sure module is importable, since this could also raise an error.
290 fmt.module
291 return ioformats[name]
294def register_external_io_formats(group):
295 if hasattr(entry_points(), 'select'):
296 fmt_entry_points = entry_points().select(group=group)
297 else:
298 fmt_entry_points = entry_points().get(group, ())
300 for entry_point in fmt_entry_points:
301 try:
302 define_external_io_format(entry_point)
303 except Exception as exc:
304 warnings.warn(
305 'Failed to register external '
306 f'IO format {entry_point.name}: {exc}'
307 )
310def define_external_io_format(entry_point):
312 fmt = entry_point.load()
313 if entry_point.name in ioformats:
314 raise ValueError(f'Format {entry_point.name} already defined')
315 if not isinstance(fmt, ExternalIOFormat):
316 raise TypeError('Wrong type for registering external IO formats '
317 f'in format {entry_point.name}, expected '
318 'ExternalIOFormat')
319 F(entry_point.name, **fmt._asdict(), external=True)
322# We define all the IO formats below. Each IO format has a code,
323# such as '1F', which defines some of the format's properties:
324#
325# 1=single atoms object
326# +=multiple atoms objects
327# F=accepts a file-descriptor
328# S=needs a file-name str
329# B=like F, but opens in binary mode
331F = define_io_format
332F('abinit-gsr', 'ABINIT GSR file', '1S',
333 module='abinit', glob='*o_GSR.nc')
334F('abinit-in', 'ABINIT input file', '1F',
335 module='abinit', magic=b'*znucl *')
336F('abinit-out', 'ABINIT output file', '1F',
337 module='abinit', magic=b'*.Version * of ABINIT')
338F('aims', 'FHI-aims geometry file', '1S', ext='in')
339F('aims-output', 'FHI-aims output', '+S',
340 module='aims', magic=b'*Invoking FHI-aims ...')
341F('bundletrajectory', 'ASE bundle trajectory', '+S')
342# XXX: Define plugin in ase db backends package:
343# F('aselmdb', 'ASE LMDB format', '+F')
344F('castep-castep', 'CASTEP output file', '+F',
345 module='castep', ext='castep')
346F('castep-cell', 'CASTEP geom file', '1F',
347 module='castep', ext='cell')
348F('castep-geom', 'CASTEP trajectory file', '+F',
349 module='castep', ext='geom')
350F('castep-md', 'CASTEP molecular dynamics file', '+F',
351 module='castep', ext='md')
352F('castep-phonon', 'CASTEP phonon file', '1F',
353 module='castep', ext='phonon')
354F('cfg', 'AtomEye configuration', '1F')
355F('cif', 'CIF-file', '+B', ext='cif')
356F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
357F('cjson', 'Chemical json file', '1F', ext='cjson')
358F('cp2k-dcd', 'CP2K DCD file', '+B',
359 module='cp2k', ext='dcd')
360F('cp2k-restart', 'CP2K restart file', '1F',
361 module='cp2k', ext='restart')
362F('crystal', 'Crystal fort.34 format', '1F',
363 ext=['f34', '34'], glob=['f34', '34'])
364F('cube', 'CUBE file', '1F', ext='cube')
365F('dacapo-text', 'Dacapo text output', '1F',
366 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
367F('db', 'ASE SQLite database file', '+S')
368F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
369F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
370 module='dlp4', ext='config', glob=['*CONFIG*'])
371F('dlp-history', 'DL_POLY HISTORY file', '+F',
372 module='dlp4', glob='HISTORY')
373F('dmol-arc', 'DMol3 arc file', '+S',
374 module='dmol', ext='arc')
375F('dmol-car', 'DMol3 structure file', '1S',
376 module='dmol', ext='car')
377F('dmol-incoor', 'DMol3 structure file', '1S',
378 module='dmol')
379F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
380 glob=['GEOMETRY.OUT'])
381F('elk-in', 'ELK input file', '1F', module='elk')
382F('eon', 'EON CON file', '+F',
383 ext='con')
384F('eps', 'Encapsulated Postscript', '1S')
385F('espresso-in', 'Quantum espresso in file', '1F',
386 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
387F('espresso-out', 'Quantum espresso out file', '+F',
388 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
389F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
390F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
391F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
392F('findsym', 'FINDSYM-format', '+F')
393F('gamess-us-out', 'GAMESS-US output file', '1F',
394 module='gamess_us', magic=b'*GAMESS')
395F('gamess-us-in', 'GAMESS-US input file', '1F',
396 module='gamess_us')
397F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
398 module='gamess_us', magic=b' $DATA', ext='dat')
399F('gaussian-in', 'Gaussian com (input) file', '1F',
400 module='gaussian', ext=['com', 'gjf'])
401F('gaussian-out', 'Gaussian output file', '+F',
402 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
403F('acemolecule-out', 'ACE output file', '1S',
404 module='acemolecule')
405F('acemolecule-input', 'ACE input file', '1S',
406 module='acemolecule')
407F('gen', 'DFTBPlus GEN format', '1F')
408F('gif', 'Graphics interchange format', '+S',
409 module='animation')
410F('gpaw-out', 'GPAW text output', '+F',
411 magic=b'* ___ ___ ___ _ _ _')
412F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
413F('gpw', 'GPAW restart-file', '1S',
414 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
415F('gromacs', 'Gromacs coordinates', '1F',
416 ext='gro')
417F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
418F('html', 'X3DOM HTML', '1F', module='x3d')
419F('json', 'ASE JSON database file', '+F', ext='json', module='db')
420F('jsv', 'JSV file format', '1F')
421F('lammps-dump-text', 'LAMMPS text dump file', '+F',
422 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
423F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
424 module='lammpsrun')
425F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
426 encoding='ascii')
427F('magres', 'MAGRES ab initio NMR data file', '1F')
428F('mol', 'MDL Molfile', '1F')
429F('mp4', 'MP4 animation', '+S',
430 module='animation')
431F('mustem', 'muSTEM xtl file', '1F',
432 ext='xtl')
433F('mysql', 'ASE MySQL database file', '+S',
434 module='db')
435F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
436 magic=b'CDF')
437F('nomad-json', 'JSON from Nomad archive', '+F',
438 ext='nomad-json')
439F('nwchem-in', 'NWChem input file', '1F',
440 module='nwchem', ext='nwi')
441F('nwchem-out', 'NWChem output file', '+F',
442 module='nwchem', ext='nwo',
443 magic=b'*Northwest Computational Chemistry Package')
444F('octopus-in', 'Octopus input file', '1F',
445 module='octopus', glob='inp')
446F('onetep-out', 'ONETEP output file', '+F',
447 module='onetep',
448 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
449F('onetep-in', 'ONETEP input file', '1F',
450 module='onetep',
451 magic=[b'*lock species ',
452 b'*LOCK SPECIES ',
453 b'*--- INPUT FILE ---*'])
454F('orca-output', 'ORCA output', '+F',
455 module='orca', magic=b'* O R C A *')
456F('proteindatabank', 'Protein Data Bank', '+F',
457 ext='pdb')
458F('png', 'Portable Network Graphics', '1B')
459F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
460F('pov', 'Persistance of Vision', '1S')
461# prismatic: Should have ext='xyz' if/when multiple formats can have the same
462# extension
463F('prismatic', 'prismatic and computem XYZ-file', '1F')
464F('py', 'Python file', '+F')
465F('sys', 'qball sys file', '1F')
466F('qbox', 'QBOX output file', '+F',
467 magic=b'*:simulation xmlns:')
468F('res', 'SHELX format', '1S', ext='shelx')
469F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
470F('sdf', 'SDF format', '1F')
471F('siesta-xv', 'Siesta .XV file', '1F',
472 glob='*.XV', module='siesta')
473F('struct', 'WIEN2k structure file', '1S', module='wien2k')
474F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
475F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
476 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
477F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
478 magic=b'$coord')
479F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
480 module='turbomole', glob='gradient', magic=b'$grad')
481F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
482F('vasp', 'VASP POSCAR/CONTCAR', '1F',
483 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
484F('vasp-out', 'VASP OUTCAR file', '+F',
485 module='vasp', glob='*OUTCAR*')
486F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
487 module='vasp', glob='*XDATCAR*')
488F('vasp-xml', 'VASP vasprun.xml file', '+F',
489 module='vasp', glob='*vasp*.xml')
490F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
491F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
492F('wout', 'Wannier90 output', '1F', module='wannier90')
493F('x3d', 'X3D', '1S')
494F('xsd', 'Materials Studio file', '1F')
495F('xsf', 'XCrySDen Structure File', '+F',
496 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
497 b'*\nMOLECULE', b'*\nATOMS'])
498F('xtd', 'Materials Studio file', '+F')
499# xyz: No `ext='xyz'` in the definition below.
500# The .xyz files are handled by the extxyz module by default.
501F('xyz', 'XYZ-file', '+F')
503# Register IO formats exposed through the ase.ioformats entry point
504register_external_io_formats('ase.ioformats')
507def get_compression(filename: str) -> Tuple[str, Optional[str]]:
508 """
509 Parse any expected file compression from the extension of a filename.
510 Return the filename without the extension, and the extension. Recognises
511 ``.gz``, ``.bz2``, ``.xz``.
513 >>> get_compression('H2O.pdb.gz')
514 ('H2O.pdb', 'gz')
515 >>> get_compression('crystal.cif')
516 ('crystal.cif', None)
518 Parameters
519 ==========
520 filename: str
521 Full filename including extension.
523 Returns
524 =======
525 (root, extension): (str, str or None)
526 Filename split into root without extension, and the extension
527 indicating compression format. Will not split if compression
528 is not recognised.
529 """
530 # Update if anything is added
531 valid_compression = ['gz', 'bz2', 'xz']
533 # Use stdlib as it handles most edge cases
534 root, compression = os.path.splitext(filename)
536 # extension keeps the '.' so remember to remove it
537 if compression.strip('.') in valid_compression:
538 return root, compression.strip('.')
539 else:
540 return filename, None
543def open_with_compression(filename: str, mode: str = 'r') -> IO:
544 """
545 Wrapper around builtin `open` that will guess compression of a file
546 from the filename and open it for reading or writing as if it were
547 a standard file.
549 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
551 Supported modes are:
552 * 'r', 'rt', 'w', 'wt' for text mode read and write.
553 * 'rb, 'wb' for binary read and write.
555 Parameters
556 ==========
557 filename: str
558 Path to the file to open, including any extensions that indicate
559 the compression used.
560 mode: str
561 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
563 Returns
564 =======
565 fd: file
566 File-like object open with the specified mode.
567 """
569 # Compressed formats sometimes default to binary, so force text mode.
570 if mode == 'r':
571 mode = 'rt'
572 elif mode == 'w':
573 mode = 'wt'
574 elif mode == 'a':
575 mode = 'at'
577 _root, compression = get_compression(filename)
579 if compression == 'gz':
580 import gzip
581 return gzip.open(filename, mode=mode) # type: ignore[return-value]
582 elif compression == 'bz2':
583 import bz2
584 return bz2.open(filename, mode=mode)
585 elif compression == 'xz':
586 import lzma
587 return lzma.open(filename, mode)
588 else:
589 # Either None or unknown string
590 return open(filename, mode)
593def is_compressed(fd: io.BufferedIOBase) -> bool:
594 """Check if the file object is in a compressed format."""
595 compressed = False
597 # We'd like to avoid triggering imports unless already imported.
598 # Also, Python can be compiled without e.g. lzma so we need to
599 # protect against that:
600 if 'gzip' in sys.modules:
601 import gzip
602 compressed = compressed or isinstance(fd, gzip.GzipFile)
603 if 'bz2' in sys.modules:
604 import bz2
605 compressed = compressed or isinstance(fd, bz2.BZ2File)
606 if 'lzma' in sys.modules:
607 import lzma
608 compressed = compressed or isinstance(fd, lzma.LZMAFile)
609 return compressed
612def wrap_read_function(read, filename, index=None, **kwargs):
613 """Convert read-function to generator."""
614 if index is None:
615 yield read(filename, **kwargs)
616 else:
617 yield from read(filename, index, **kwargs)
620NameOrFile = Union[str, PurePath, IO]
623def write(
624 filename: NameOrFile,
625 images: Union[Atoms, Sequence[Atoms]],
626 format: str = None,
627 parallel: bool = True,
628 append: bool = False,
629 **kwargs: Any
630) -> None:
631 """Write Atoms object(s) to file.
633 filename: str or file
634 Name of the file to write to or a file descriptor. The name '-'
635 means standard output.
636 images: Atoms object or list of Atoms objects
637 A single Atoms object or a list of Atoms objects.
638 format: str
639 Used to specify the file-format. If not given, the
640 file-format will be taken from suffix of the filename.
641 parallel: bool
642 Default is to write on master only. Use parallel=False to write
643 from all slaves.
644 append: bool
645 Default is to open files in 'w' or 'wb' mode, overwriting
646 existing files. In some cases opening the file in 'a' or 'ab'
647 mode (appending) is useful,
648 e.g. writing trajectories or saving multiple Atoms objects in one file.
649 WARNING: If the file format does not support multiple entries without
650 additional keywords/headers, files created using 'append=True'
651 might not be readable by any program! They will nevertheless be
652 written without error message.
654 The use of additional keywords is format specific. write() may
655 return an object after writing certain formats, but this behaviour
656 may change in the future.
658 """
660 if isinstance(filename, PurePath):
661 filename = str(filename)
663 if isinstance(filename, str):
664 fd = None
665 if filename == '-':
666 fd = sys.stdout
667 filename = None # type: ignore[assignment]
668 elif format is None:
669 format = filetype(filename, read=False)
670 assert isinstance(format, str)
671 else:
672 fd = filename # type: ignore[assignment]
673 if format is None:
674 try:
675 format = filetype(filename, read=False)
676 assert isinstance(format, str)
677 except UnknownFileTypeError:
678 format = None
679 filename = None # type: ignore[assignment]
681 format = format or 'json' # default is json
683 io = get_ioformat(format)
685 return _write(filename, fd, format, io, images,
686 parallel=parallel, append=append, **kwargs)
689@parallel_function
690def _write(filename, fd, format, io, images, parallel=None, append=False,
691 **kwargs):
692 if isinstance(images, Atoms):
693 images = [images]
695 if io.single:
696 if len(images) > 1:
697 raise ValueError('{}-format can only store 1 Atoms object.'
698 .format(format))
699 images = images[0]
701 if not io.can_write:
702 raise ValueError(f"Can't write to {format}-format")
704 # Special case for json-format:
705 if format == 'json' and (len(images) > 1 or append):
706 if filename is not None:
707 return io.write(filename, images, append=append, **kwargs)
708 raise ValueError("Can't write more than one image to file-descriptor "
709 'using json-format.')
711 if io.acceptsfd:
712 open_new = (fd is None)
713 try:
714 if open_new:
715 mode = 'wb' if io.isbinary else 'w'
716 if append:
717 mode = mode.replace('w', 'a')
718 fd = open_with_compression(filename, mode)
719 # XXX remember to re-enable compressed open
720 # fd = io.open(filename, mode)
721 return io.write(fd, images, **kwargs)
722 finally:
723 if open_new and fd is not None:
724 fd.close()
725 else:
726 if fd is not None:
727 raise ValueError("Can't write {}-format to file-descriptor"
728 .format(format))
729 if io.can_append:
730 return io.write(filename, images, append=append, **kwargs)
731 elif append:
732 raise ValueError("Cannot append to {}-format, write-function "
733 "does not support the append keyword."
734 .format(format))
735 else:
736 return io.write(filename, images, **kwargs)
739def read(
740 filename: NameOrFile,
741 index: Any = None,
742 format: Optional[str] = None,
743 parallel: bool = True,
744 do_not_split_by_at_sign: bool = False,
745 **kwargs
746) -> Union[Atoms, List[Atoms]]:
747 """Read Atoms object(s) from file.
749 filename: str or file
750 Name of the file to read from or a file descriptor.
751 index: int, slice or str
752 The last configuration will be returned by default. Examples:
754 * ``index=0``: first configuration
755 * ``index=-2``: second to last
756 * ``index=':'`` or ``index=slice(None)``: all
757 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
758 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
759 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
760 format: str
761 Used to specify the file-format. If not given, the
762 file-format will be guessed by the *filetype* function.
763 parallel: bool
764 Default is to read on master and broadcast to slaves. Use
765 parallel=False to read on all slaves.
766 do_not_split_by_at_sign: bool
767 If False (default) ``filename`` is splitted by at sign ``@``
769 Many formats allow on open file-like object to be passed instead
770 of ``filename``. In this case the format cannot be auto-detected,
771 so the ``format`` argument should be explicitly given."""
773 if isinstance(filename, PurePath):
774 filename = str(filename)
775 if filename == '-':
776 filename = sys.stdin
777 if isinstance(index, str):
778 try:
779 index = string2index(index)
780 except ValueError:
781 pass
783 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
784 if index is None:
785 index = -1
786 format = format or filetype(filename, read=isinstance(filename, str))
788 io = get_ioformat(format)
789 if isinstance(index, (slice, str)):
790 return list(_iread(filename, index, format, io, parallel=parallel,
791 **kwargs))
792 else:
793 return next(_iread(filename, slice(index, None), format, io,
794 parallel=parallel, **kwargs))
797def iread(
798 filename: NameOrFile,
799 index: Any = None,
800 format: str = None,
801 parallel: bool = True,
802 do_not_split_by_at_sign: bool = False,
803 **kwargs
804) -> Iterator[Atoms]:
805 """Iterator for reading Atoms objects from file.
807 Works as the `read` function, but yields one Atoms object at a time
808 instead of all at once."""
810 if isinstance(filename, PurePath):
811 filename = str(filename)
813 if isinstance(index, str):
814 index = string2index(index)
816 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
818 if index is None or index == ':':
819 index = slice(None, None, None)
821 if not isinstance(index, (slice, str)):
822 index = slice(index, (index + 1) or None)
824 format = format or filetype(filename, read=isinstance(filename, str))
825 io = get_ioformat(format)
827 yield from _iread(filename, index, format, io, parallel=parallel,
828 **kwargs)
831@parallel_generator
832def _iread(filename, index, format, io, parallel=None, full_output=False,
833 **kwargs):
835 if not io.can_read:
836 raise ValueError(f"Can't read from {format}-format")
838 if io.single:
839 start = index.start
840 assert start is None or start == 0 or start == -1
841 args = ()
842 else:
843 args = (index,)
845 must_close_fd = False
846 if isinstance(filename, str):
847 if io.acceptsfd:
848 mode = 'rb' if io.isbinary else 'r'
849 fd = open_with_compression(filename, mode)
850 must_close_fd = True
851 else:
852 fd = filename
853 else:
854 assert io.acceptsfd
855 fd = filename
857 # Make sure fd is closed in case loop doesn't finish:
858 try:
859 for dct in io.read(fd, *args, **kwargs):
860 if not isinstance(dct, dict):
861 dct = {'atoms': dct}
862 if full_output:
863 yield dct
864 else:
865 yield dct['atoms']
866 finally:
867 if must_close_fd:
868 fd.close()
871def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
872 if not isinstance(filename, str):
873 return filename, index
875 basename = os.path.basename(filename)
876 if do_not_split_by_at_sign or '@' not in basename:
877 return filename, index
879 newindex = None
880 newfilename, newindex = filename.rsplit('@', 1)
882 if isinstance(index, slice):
883 return newfilename, index
884 try:
885 newindex = string2index(newindex)
886 except ValueError:
887 warnings.warn('Can not parse index for path \n'
888 ' "%s" \nConsider set '
889 'do_not_split_by_at_sign=True \nif '
890 'there is no index.' % filename)
891 return newfilename, newindex
894def match_magic(data: bytes) -> IOFormat:
895 data = data[:PEEK_BYTES]
896 for ioformat in ioformats.values():
897 if ioformat.match_magic(data):
898 return ioformat
899 raise UnknownFileTypeError('Cannot guess file type from contents')
902def filetype(
903 filename: NameOrFile,
904 read: bool = True,
905 guess: bool = True,
906) -> str:
907 """Try to guess the type of the file.
909 First, special signatures in the filename will be checked for. If that
910 does not identify the file type, then the first 2000 bytes of the file
911 will be read and analysed. Turn off this second part by using
912 read=False.
914 Can be used from the command-line also::
916 $ ase info filename ...
917 """
919 orig_filename = filename
920 if hasattr(filename, 'name'):
921 filename = filename.name
923 ext = None
924 if isinstance(filename, str):
925 if os.path.isdir(filename):
926 if os.path.basename(os.path.normpath(filename)) == 'states':
927 return 'eon'
928 return 'bundletrajectory'
930 if filename.startswith('postgres'):
931 return 'postgresql'
933 if filename.startswith('mysql') or filename.startswith('mariadb'):
934 return 'mysql'
936 if filename.endswith('aselmdb'):
937 return 'db'
939 # strip any compression extensions that can be read
940 root, _compression = get_compression(filename)
941 basename = os.path.basename(root)
943 if '.' in basename:
944 ext = os.path.splitext(basename)[1].strip('.').lower()
946 for fmt in ioformats.values():
947 if fmt.match_name(basename):
948 return fmt.name
950 if not read:
951 if ext is None:
952 raise UnknownFileTypeError('Could not guess file type')
953 ioformat = extension2format.get(ext)
954 if ioformat:
955 return ioformat.name
957 # askhl: This is strange, we don't know if ext is a format:
958 return ext
960 if orig_filename == filename:
961 fd = open_with_compression(filename, 'rb')
962 else:
963 fd = orig_filename # type: ignore[assignment]
964 else:
965 fd = filename
966 if fd is sys.stdin:
967 return 'json'
969 data = fd.read(PEEK_BYTES)
970 if fd is not filename:
971 fd.close()
972 else:
973 fd.seek(0)
975 if len(data) == 0:
976 raise UnknownFileTypeError('Empty file: ' + filename)
978 try:
979 return match_magic(data).name
980 except UnknownFileTypeError:
981 pass
983 format = None
984 if ext in extension2format:
985 format = extension2format[ext].name
987 if format is None and guess:
988 format = ext
989 if format is None:
990 # Do quick xyz check:
991 lines = data.splitlines()
992 if lines and lines[0].strip().isdigit():
993 return extension2format['xyz'].name
995 raise UnknownFileTypeError('Could not guess file type')
996 assert isinstance(format, str)
997 return format
1000def index2range(index: int | slice, length: int) -> range:
1001 """Convert slice or integer to range.
1003 If index is an integer, range will contain only that integer."""
1004 if isinstance(index, int):
1005 return range(index, index + 1)
1006 return range(length)[index]