Coverage for /builds/ase/ase/ase/io/formats.py: 91.01%
534 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3"""File formats.
5This module implements the read(), iread() and write() functions in ase.io.
6For each file format there is an IOFormat object.
8There is a dict, ioformats, which stores the objects.
10Example
11=======
13The xyz format is implemented in the ase/io/xyz.py file which has a
14read_xyz() generator and a write_xyz() function. This and other
15information can be obtained from ioformats['xyz'].
16"""
18import functools
19import inspect
20import io
21import numbers
22import os
23import re
24import sys
25import warnings
26from importlib import import_module
27from importlib.metadata import entry_points
28from pathlib import PurePath
29from typing import (
30 IO,
31 Any,
32 Dict,
33 Iterator,
34 List,
35 Optional,
36 Sequence,
37 Tuple,
38 Union,
39)
41from ase.atoms import Atoms
42from ase.parallel import parallel_function, parallel_generator
43from ase.utils import string2index
44from ase.utils.plugins import ExternalIOFormat
46PEEK_BYTES = 50000
49class UnknownFileTypeError(Exception):
50 pass
53class IOFormat:
54 def __init__(self, name: str, desc: str, code: str, module_name: str,
55 encoding: str = None) -> None:
56 self.name = name
57 self.description = desc
58 assert len(code) == 2
59 assert code[0] in list('+1')
60 assert code[1] in list('BFS')
61 self.code = code
62 self.module_name = module_name
63 self.encoding = encoding
65 # (To be set by define_io_format())
66 self.extensions: List[str] = []
67 self.globs: List[str] = []
68 self.magic: List[str] = []
69 self.magic_regex: Optional[bytes] = None
71 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
72 encoding = self.encoding
73 if encoding is None:
74 encoding = 'utf-8' # Best hacky guess.
76 if self.isbinary:
77 if isinstance(data, str):
78 data = data.encode(encoding)
79 else:
80 if isinstance(data, bytes):
81 data = data.decode(encoding)
83 return self._ioclass(data)
85 @property
86 def _ioclass(self):
87 if self.isbinary:
88 return io.BytesIO
89 else:
90 return io.StringIO
92 def parse_images(self, data: Union[str, bytes],
93 **kwargs) -> Sequence[Atoms]:
94 with self._buf_as_filelike(data) as fd:
95 outputs = self.read(fd, **kwargs)
96 if self.single:
97 assert isinstance(outputs, Atoms)
98 return [outputs]
99 else:
100 return list(self.read(fd, **kwargs))
102 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
103 images = self.parse_images(data, **kwargs)
104 return images[-1]
106 @property
107 def can_read(self) -> bool:
108 return self._readfunc() is not None
110 @property
111 def can_write(self) -> bool:
112 return self._writefunc() is not None
114 @property
115 def can_append(self) -> bool:
116 writefunc = self._writefunc()
117 return self.can_write and 'append' in writefunc.__code__.co_varnames
119 def __repr__(self) -> str:
120 tokens = [f'{name}={value!r}'
121 for name, value in vars(self).items()]
122 return 'IOFormat({})'.format(', '.join(tokens))
124 def __getitem__(self, i):
125 # For compatibility.
126 #
127 # Historically, the ioformats were listed as tuples
128 # with (description, code). We look like such a tuple.
129 return (self.description, self.code)[i]
131 @property
132 def single(self) -> bool:
133 """Whether this format is for a single Atoms object."""
134 return self.code[0] == '1'
136 @property
137 def _formatname(self) -> str:
138 return self.name.replace('-', '_')
140 def _readfunc(self):
141 return getattr(self.module, 'read_' + self._formatname, None)
143 def _writefunc(self):
144 return getattr(self.module, 'write_' + self._formatname, None)
146 @property
147 def read(self):
148 if not self.can_read:
149 self._warn_none('read')
150 return None
152 return self._read_wrapper
154 def _read_wrapper(self, *args, **kwargs):
155 function = self._readfunc()
156 if function is None:
157 self._warn_none('read')
158 return None
159 if not inspect.isgeneratorfunction(function):
160 function = functools.partial(wrap_read_function, function)
161 return function(*args, **kwargs)
163 def _warn_none(self, action):
164 msg = ('Accessing the IOFormat.{action} property on a format '
165 'without {action} support will change behaviour in the '
166 'future and return a callable instead of None. '
167 'Use IOFormat.can_{action} to check whether {action} '
168 'is supported.')
169 warnings.warn(msg.format(action=action), FutureWarning)
171 @property
172 def write(self):
173 if not self.can_write:
174 self._warn_none('write')
175 return None
177 return self._write_wrapper
179 def _write_wrapper(self, *args, **kwargs):
180 function = self._writefunc()
181 if function is None:
182 raise ValueError(f'Cannot write to {self.name}-format')
183 return function(*args, **kwargs)
185 @property
186 def modes(self) -> str:
187 modes = ''
188 if self.can_read:
189 modes += 'r'
190 if self.can_write:
191 modes += 'w'
192 return modes
194 def full_description(self) -> str:
195 lines = [f'Name: {self.name}',
196 f'Description: {self.description}',
197 f'Modes: {self.modes}',
198 f'Encoding: {self.encoding}',
199 f'Module: {self.module_name}',
200 f'Code: {self.code}',
201 f'Extensions: {self.extensions}',
202 f'Globs: {self.globs}',
203 f'Magic: {self.magic}']
204 return '\n'.join(lines)
206 @property
207 def acceptsfd(self) -> bool:
208 return self.code[1] != 'S'
210 @property
211 def isbinary(self) -> bool:
212 return self.code[1] == 'B'
214 @property
215 def module(self):
216 try:
217 return import_module(self.module_name)
218 except ImportError as err:
219 raise UnknownFileTypeError(
220 f'File format not recognized: {self.name}. Error: {err}')
222 def match_name(self, basename: str) -> bool:
223 from fnmatch import fnmatch
224 return any(fnmatch(basename, pattern)
225 for pattern in self.globs)
227 def match_magic(self, data: bytes) -> bool:
228 if self.magic_regex:
229 assert not self.magic, 'Define only one of magic and magic_regex'
230 match = re.match(self.magic_regex, data, re.M | re.S)
231 return match is not None
233 from fnmatch import fnmatchcase
234 return any(
235 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
236 for magic in self.magic
237 )
240ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
241extension2format = {}
244all_formats = ioformats # Aliased for compatibility only. Please do not use.
245format2modulename = {} # Left for compatibility only.
248def define_io_format(name, desc, code, *, module=None, ext=None,
249 glob=None, magic=None, encoding=None,
250 magic_regex=None, external=False):
251 if module is None:
252 module = name.replace('-', '_')
253 format2modulename[name] = module
255 if not external:
256 module = 'ase.io.' + module
258 def normalize_patterns(strings):
259 if strings is None:
260 strings = []
261 elif isinstance(strings, (str, bytes)):
262 strings = [strings]
263 else:
264 strings = list(strings)
265 return strings
267 fmt = IOFormat(name, desc, code, module_name=module,
268 encoding=encoding)
269 fmt.extensions = normalize_patterns(ext)
270 fmt.globs = normalize_patterns(glob)
271 fmt.magic = normalize_patterns(magic)
273 if magic_regex is not None:
274 fmt.magic_regex = magic_regex
276 for ext in fmt.extensions:
277 if ext in extension2format:
278 raise ValueError(f'extension "{ext}" already registered')
279 extension2format[ext] = fmt
281 ioformats[name] = fmt
282 return fmt
285def get_ioformat(name: str) -> IOFormat:
286 """Return ioformat object or raise appropriate error."""
287 if name not in ioformats:
288 raise UnknownFileTypeError(name)
289 fmt = ioformats[name]
290 # Make sure module is importable, since this could also raise an error.
291 fmt.module
292 return ioformats[name]
295def register_external_io_formats(group):
296 if hasattr(entry_points(), 'select'):
297 fmt_entry_points = entry_points().select(group=group)
298 else:
299 fmt_entry_points = entry_points().get(group, ())
301 for entry_point in fmt_entry_points:
302 try:
303 define_external_io_format(entry_point)
304 except Exception as exc:
305 warnings.warn(
306 'Failed to register external '
307 f'IO format {entry_point.name}: {exc}'
308 )
311def define_external_io_format(entry_point):
313 fmt = entry_point.load()
314 if entry_point.name in ioformats:
315 raise ValueError(f'Format {entry_point.name} already defined')
316 if not isinstance(fmt, ExternalIOFormat):
317 raise TypeError('Wrong type for registering external IO formats '
318 f'in format {entry_point.name}, expected '
319 'ExternalIOFormat')
320 F(entry_point.name, **fmt._asdict(), external=True)
323# We define all the IO formats below. Each IO format has a code,
324# such as '1F', which defines some of the format's properties:
325#
326# 1=single atoms object
327# +=multiple atoms objects
328# F=accepts a file-descriptor
329# S=needs a file-name str
330# B=like F, but opens in binary mode
332F = define_io_format
333F('abinit-gsr', 'ABINIT GSR file', '1S',
334 module='abinit', glob='*o_GSR.nc')
335F('abinit-in', 'ABINIT input file', '1F',
336 module='abinit', magic=b'*znucl *')
337F('abinit-out', 'ABINIT output file', '1F',
338 module='abinit', magic=b'*.Version * of ABINIT')
339F('aims', 'FHI-aims geometry file', '1S', ext='in')
340F('aims-output', 'FHI-aims output', '+S',
341 module='aims', magic=b'*Invoking FHI-aims ...')
342F('bundletrajectory', 'ASE bundle trajectory', '+S')
343# XXX: Define plugin in ase db backends package:
344# F('aselmdb', 'ASE LMDB format', '+F')
345F('castep-castep', 'CASTEP output file', '+F',
346 module='castep', ext='castep')
347F('castep-cell', 'CASTEP geom file', '1F',
348 module='castep', ext='cell')
349F('castep-geom', 'CASTEP trajectory file', '+F',
350 module='castep', ext='geom')
351F('castep-md', 'CASTEP molecular dynamics file', '+F',
352 module='castep', ext='md')
353F('castep-phonon', 'CASTEP phonon file', '1F',
354 module='castep', ext='phonon')
355F('cfg', 'AtomEye configuration', '1F')
356F('cif', 'CIF-file', '+B', ext='cif')
357F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
358F('cjson', 'Chemical json file', '1F', ext='cjson')
359F('cp2k-dcd', 'CP2K DCD file', '+B',
360 module='cp2k', ext='dcd')
361F('cp2k-restart', 'CP2K restart file', '1F',
362 module='cp2k', ext='restart')
363F('crystal', 'Crystal fort.34 format', '1F',
364 ext=['f34', '34'], glob=['f34', '34'])
365F('cube', 'CUBE file', '1F', ext='cube')
366F('dacapo-text', 'Dacapo text output', '1F',
367 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
368F('db', 'ASE SQLite database file', '+S')
369F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
370F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
371 module='dlp4', ext='config', glob=['*CONFIG*'])
372F('dlp-history', 'DL_POLY HISTORY file', '+F',
373 module='dlp4', glob='HISTORY')
374F('dmol-arc', 'DMol3 arc file', '+S',
375 module='dmol', ext='arc')
376F('dmol-car', 'DMol3 structure file', '1S',
377 module='dmol', ext='car')
378F('dmol-incoor', 'DMol3 structure file', '1S',
379 module='dmol')
380F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
381 glob=['GEOMETRY.OUT'])
382F('elk-in', 'ELK input file', '1F', module='elk')
383F('eon', 'EON CON file', '+F',
384 ext='con')
385F('eps', 'Encapsulated Postscript', '1S')
386F('espresso-in', 'Quantum espresso in file', '1F',
387 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
388F('espresso-out', 'Quantum espresso out file', '+F',
389 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
390F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
391F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
392F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
393F('findsym', 'FINDSYM-format', '+F')
394F('gamess-us-out', 'GAMESS-US output file', '1F',
395 module='gamess_us', magic=b'*GAMESS')
396F('gamess-us-in', 'GAMESS-US input file', '1F',
397 module='gamess_us')
398F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
399 module='gamess_us', magic=b' $DATA', ext='dat')
400F('gaussian-in', 'Gaussian com (input) file', '1F',
401 module='gaussian', ext=['com', 'gjf'])
402F('gaussian-out', 'Gaussian output file', '+F',
403 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
404F('acemolecule-out', 'ACE output file', '1S',
405 module='acemolecule')
406F('acemolecule-input', 'ACE input file', '1S',
407 module='acemolecule')
408F('gen', 'DFTBPlus GEN format', '1F')
409F('gif', 'Graphics interchange format', '+S',
410 module='animation')
411F('gpaw-out', 'GPAW text output', '+F',
412 magic=b'* ___ ___ ___ _ _ _')
413F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
414F('gpw', 'GPAW restart-file', '1S',
415 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
416F('gromacs', 'Gromacs coordinates', '1F',
417 ext='gro')
418F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
419F('html', 'X3DOM HTML', '1F', module='x3d')
420F('json', 'ASE JSON database file', '+F', ext='json', module='db')
421F('jsv', 'JSV file format', '1F')
422F('lammps-dump-text', 'LAMMPS text dump file', '+F',
423 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
424F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
425 module='lammpsrun')
426F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
427 encoding='ascii')
428F('magres', 'MAGRES ab initio NMR data file', '1F')
429F('mol', 'MDL Molfile', '1F')
430F('mp4', 'MP4 animation', '+S',
431 module='animation')
432F('mustem', 'muSTEM xtl file', '1F',
433 ext='xtl')
434F('mysql', 'ASE MySQL database file', '+S',
435 module='db')
436F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
437 magic=b'CDF')
438F('nomad-json', 'JSON from Nomad archive', '+F',
439 ext='nomad-json')
440F('nwchem-in', 'NWChem input file', '1F',
441 module='nwchem', ext='nwi')
442F('nwchem-out', 'NWChem output file', '+F',
443 module='nwchem', ext='nwo',
444 magic=b'*Northwest Computational Chemistry Package')
445F('octopus-in', 'Octopus input file', '1F',
446 module='octopus', glob='inp')
447F('onetep-out', 'ONETEP output file', '+F',
448 module='onetep',
449 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
450F('onetep-in', 'ONETEP input file', '1F',
451 module='onetep',
452 magic=[b'*lock species ',
453 b'*LOCK SPECIES ',
454 b'*--- INPUT FILE ---*'])
455F('orca-output', 'ORCA output', '+F',
456 module='orca', magic=b'* O R C A *')
457F('proteindatabank', 'Protein Data Bank', '+F',
458 ext='pdb')
459F('png', 'Portable Network Graphics', '1B')
460F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
461F('pov', 'Persistance of Vision', '1S')
462# prismatic: Should have ext='xyz' if/when multiple formats can have the same
463# extension
464F('prismatic', 'prismatic and computem XYZ-file', '1F')
465F('py', 'Python file', '+F')
466F('sys', 'qball sys file', '1F')
467F('qbox', 'QBOX output file', '+F',
468 magic=b'*:simulation xmlns:')
469F('res', 'SHELX format', '1S', ext='shelx')
470F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
471F('sdf', 'SDF format', '1F')
472F('siesta-xv', 'Siesta .XV file', '1F',
473 glob='*.XV', module='siesta')
474F('struct', 'WIEN2k structure file', '1S', module='wien2k')
475F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
476F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
477 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
478F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
479 magic=b'$coord')
480F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
481 module='turbomole', glob='gradient', magic=b'$grad')
482F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
483F('vasp', 'VASP POSCAR/CONTCAR', '1F',
484 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
485F('vasp-out', 'VASP OUTCAR file', '+F',
486 module='vasp', glob='*OUTCAR*')
487F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
488 module='vasp', glob='*XDATCAR*')
489F('vasp-xml', 'VASP vasprun.xml file', '+F',
490 module='vasp', glob='*vasp*.xml')
491F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
492F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
493F('wout', 'Wannier90 output', '1F', module='wannier90')
494F('x3d', 'X3D', '1S')
495F('xsd', 'Materials Studio file', '1F')
496F('xsf', 'XCrySDen Structure File', '+F',
497 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
498 b'*\nMOLECULE', b'*\nATOMS'])
499F('xtd', 'Materials Studio file', '+F')
500# xyz: No `ext='xyz'` in the definition below.
501# The .xyz files are handled by the extxyz module by default.
502F('xyz', 'XYZ-file', '+F')
504# Register IO formats exposed through the ase.ioformats entry point
505register_external_io_formats('ase.ioformats')
508def get_compression(filename: str) -> Tuple[str, Optional[str]]:
509 """
510 Parse any expected file compression from the extension of a filename.
511 Return the filename without the extension, and the extension. Recognises
512 ``.gz``, ``.bz2``, ``.xz``.
514 >>> get_compression('H2O.pdb.gz')
515 ('H2O.pdb', 'gz')
516 >>> get_compression('crystal.cif')
517 ('crystal.cif', None)
519 Parameters
520 ==========
521 filename: str
522 Full filename including extension.
524 Returns
525 =======
526 (root, extension): (str, str or None)
527 Filename split into root without extension, and the extension
528 indicating compression format. Will not split if compression
529 is not recognised.
530 """
531 # Update if anything is added
532 valid_compression = ['gz', 'bz2', 'xz']
534 # Use stdlib as it handles most edge cases
535 root, compression = os.path.splitext(filename)
537 # extension keeps the '.' so remember to remove it
538 if compression.strip('.') in valid_compression:
539 return root, compression.strip('.')
540 else:
541 return filename, None
544def open_with_compression(filename: str, mode: str = 'r') -> IO:
545 """
546 Wrapper around builtin `open` that will guess compression of a file
547 from the filename and open it for reading or writing as if it were
548 a standard file.
550 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
552 Supported modes are:
553 * 'r', 'rt', 'w', 'wt' for text mode read and write.
554 * 'rb, 'wb' for binary read and write.
556 Parameters
557 ==========
558 filename: str
559 Path to the file to open, including any extensions that indicate
560 the compression used.
561 mode: str
562 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
564 Returns
565 =======
566 fd: file
567 File-like object open with the specified mode.
568 """
570 # Compressed formats sometimes default to binary, so force text mode.
571 if mode == 'r':
572 mode = 'rt'
573 elif mode == 'w':
574 mode = 'wt'
575 elif mode == 'a':
576 mode = 'at'
578 _root, compression = get_compression(filename)
580 if compression == 'gz':
581 import gzip
582 return gzip.open(filename, mode=mode) # type: ignore[return-value]
583 elif compression == 'bz2':
584 import bz2
585 return bz2.open(filename, mode=mode)
586 elif compression == 'xz':
587 import lzma
588 return lzma.open(filename, mode)
589 else:
590 # Either None or unknown string
591 return open(filename, mode)
594def is_compressed(fd: io.BufferedIOBase) -> bool:
595 """Check if the file object is in a compressed format."""
596 compressed = False
598 # We'd like to avoid triggering imports unless already imported.
599 # Also, Python can be compiled without e.g. lzma so we need to
600 # protect against that:
601 if 'gzip' in sys.modules:
602 import gzip
603 compressed = compressed or isinstance(fd, gzip.GzipFile)
604 if 'bz2' in sys.modules:
605 import bz2
606 compressed = compressed or isinstance(fd, bz2.BZ2File)
607 if 'lzma' in sys.modules:
608 import lzma
609 compressed = compressed or isinstance(fd, lzma.LZMAFile)
610 return compressed
613def wrap_read_function(read, filename, index=None, **kwargs):
614 """Convert read-function to generator."""
615 if index is None:
616 yield read(filename, **kwargs)
617 else:
618 yield from read(filename, index, **kwargs)
621NameOrFile = Union[str, PurePath, IO]
624def write(
625 filename: NameOrFile,
626 images: Union[Atoms, Sequence[Atoms]],
627 format: str = None,
628 parallel: bool = True,
629 append: bool = False,
630 **kwargs: Any
631) -> None:
632 """Write Atoms object(s) to file.
634 filename: str or file
635 Name of the file to write to or a file descriptor. The name '-'
636 means standard output.
637 images: Atoms object or list of Atoms objects
638 A single Atoms object or a list of Atoms objects.
639 format: str
640 Used to specify the file-format. If not given, the
641 file-format will be taken from suffix of the filename.
642 parallel: bool
643 Default is to write on master only. Use parallel=False to write
644 from all slaves.
645 append: bool
646 Default is to open files in 'w' or 'wb' mode, overwriting
647 existing files. In some cases opening the file in 'a' or 'ab'
648 mode (appending) is useful,
649 e.g. writing trajectories or saving multiple Atoms objects in one file.
650 WARNING: If the file format does not support multiple entries without
651 additional keywords/headers, files created using 'append=True'
652 might not be readable by any program! They will nevertheless be
653 written without error message.
655 The use of additional keywords is format specific. write() may
656 return an object after writing certain formats, but this behaviour
657 may change in the future.
659 """
661 if isinstance(filename, PurePath):
662 filename = str(filename)
664 if isinstance(filename, str):
665 fd = None
666 if filename == '-':
667 fd = sys.stdout
668 filename = None # type: ignore[assignment]
669 elif format is None:
670 format = filetype(filename, read=False)
671 assert isinstance(format, str)
672 else:
673 fd = filename # type: ignore[assignment]
674 if format is None:
675 try:
676 format = filetype(filename, read=False)
677 assert isinstance(format, str)
678 except UnknownFileTypeError:
679 format = None
680 filename = None # type: ignore[assignment]
682 format = format or 'json' # default is json
684 io = get_ioformat(format)
686 return _write(filename, fd, format, io, images,
687 parallel=parallel, append=append, **kwargs)
690@parallel_function
691def _write(filename, fd, format, io, images, parallel=None, append=False,
692 **kwargs):
693 if isinstance(images, Atoms):
694 images = [images]
696 if io.single:
697 if len(images) > 1:
698 raise ValueError('{}-format can only store 1 Atoms object.'
699 .format(format))
700 images = images[0]
702 if not io.can_write:
703 raise ValueError(f"Can't write to {format}-format")
705 # Special case for json-format:
706 if format == 'json' and (len(images) > 1 or append):
707 if filename is not None:
708 return io.write(filename, images, append=append, **kwargs)
709 raise ValueError("Can't write more than one image to file-descriptor "
710 'using json-format.')
712 if io.acceptsfd:
713 open_new = (fd is None)
714 try:
715 if open_new:
716 mode = 'wb' if io.isbinary else 'w'
717 if append:
718 mode = mode.replace('w', 'a')
719 fd = open_with_compression(filename, mode)
720 # XXX remember to re-enable compressed open
721 # fd = io.open(filename, mode)
722 return io.write(fd, images, **kwargs)
723 finally:
724 if open_new and fd is not None:
725 fd.close()
726 else:
727 if fd is not None:
728 raise ValueError("Can't write {}-format to file-descriptor"
729 .format(format))
730 if io.can_append:
731 return io.write(filename, images, append=append, **kwargs)
732 elif append:
733 raise ValueError("Cannot append to {}-format, write-function "
734 "does not support the append keyword."
735 .format(format))
736 else:
737 return io.write(filename, images, **kwargs)
740def read(
741 filename: NameOrFile,
742 index: Any = None,
743 format: Optional[str] = None,
744 parallel: bool = True,
745 do_not_split_by_at_sign: bool = False,
746 **kwargs
747) -> Union[Atoms, List[Atoms]]:
748 """Read Atoms object(s) from file.
750 filename: str or file
751 Name of the file to read from or a file descriptor.
752 index: int, slice or str
753 The last configuration will be returned by default. Examples:
755 * ``index=0``: first configuration
756 * ``index=-2``: second to last
757 * ``index=':'`` or ``index=slice(None)``: all
758 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
759 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
760 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
761 format: str
762 Used to specify the file-format. If not given, the
763 file-format will be guessed by the *filetype* function.
764 parallel: bool
765 Default is to read on master and broadcast to slaves. Use
766 parallel=False to read on all slaves.
767 do_not_split_by_at_sign: bool
768 If False (default) ``filename`` is splitted by at sign ``@``
770 Many formats allow on open file-like object to be passed instead
771 of ``filename``. In this case the format cannot be auto-detected,
772 so the ``format`` argument should be explicitly given."""
774 if isinstance(filename, PurePath):
775 filename = str(filename)
776 if filename == '-':
777 filename = sys.stdin
778 if isinstance(index, str):
779 try:
780 index = string2index(index)
781 except ValueError:
782 pass
784 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
785 if index is None:
786 index = -1
787 format = format or filetype(filename, read=isinstance(filename, str))
789 io = get_ioformat(format)
790 if isinstance(index, (slice, str)):
791 return list(_iread(filename, index, format, io, parallel=parallel,
792 **kwargs))
793 else:
794 return next(_iread(filename, slice(index, None), format, io,
795 parallel=parallel, **kwargs))
798def iread(
799 filename: NameOrFile,
800 index: Any = None,
801 format: str = None,
802 parallel: bool = True,
803 do_not_split_by_at_sign: bool = False,
804 **kwargs
805) -> Iterator[Atoms]:
806 """Iterator for reading Atoms objects from file.
808 Works as the `read` function, but yields one Atoms object at a time
809 instead of all at once."""
811 if isinstance(filename, PurePath):
812 filename = str(filename)
814 if isinstance(index, str):
815 index = string2index(index)
817 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
819 if index is None or index == ':':
820 index = slice(None, None, None)
822 if not isinstance(index, (slice, str)):
823 index = slice(index, (index + 1) or None)
825 format = format or filetype(filename, read=isinstance(filename, str))
826 io = get_ioformat(format)
828 yield from _iread(filename, index, format, io, parallel=parallel,
829 **kwargs)
832@parallel_generator
833def _iread(filename, index, format, io, parallel=None, full_output=False,
834 **kwargs):
836 if not io.can_read:
837 raise ValueError(f"Can't read from {format}-format")
839 if io.single:
840 start = index.start
841 assert start is None or start == 0 or start == -1
842 args = ()
843 else:
844 args = (index,)
846 must_close_fd = False
847 if isinstance(filename, str):
848 if io.acceptsfd:
849 mode = 'rb' if io.isbinary else 'r'
850 fd = open_with_compression(filename, mode)
851 must_close_fd = True
852 else:
853 fd = filename
854 else:
855 assert io.acceptsfd
856 fd = filename
858 # Make sure fd is closed in case loop doesn't finish:
859 try:
860 for dct in io.read(fd, *args, **kwargs):
861 if not isinstance(dct, dict):
862 dct = {'atoms': dct}
863 if full_output:
864 yield dct
865 else:
866 yield dct['atoms']
867 finally:
868 if must_close_fd:
869 fd.close()
872def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
873 if not isinstance(filename, str):
874 return filename, index
876 basename = os.path.basename(filename)
877 if do_not_split_by_at_sign or '@' not in basename:
878 return filename, index
880 newindex = None
881 newfilename, newindex = filename.rsplit('@', 1)
883 if isinstance(index, slice):
884 return newfilename, index
885 try:
886 newindex = string2index(newindex)
887 except ValueError:
888 warnings.warn('Can not parse index for path \n'
889 ' "%s" \nConsider set '
890 'do_not_split_by_at_sign=True \nif '
891 'there is no index.' % filename)
892 return newfilename, newindex
895def match_magic(data: bytes) -> IOFormat:
896 data = data[:PEEK_BYTES]
897 for ioformat in ioformats.values():
898 if ioformat.match_magic(data):
899 return ioformat
900 raise UnknownFileTypeError('Cannot guess file type from contents')
903def filetype(
904 filename: NameOrFile,
905 read: bool = True,
906 guess: bool = True,
907) -> str:
908 """Try to guess the type of the file.
910 First, special signatures in the filename will be checked for. If that
911 does not identify the file type, then the first 2000 bytes of the file
912 will be read and analysed. Turn off this second part by using
913 read=False.
915 Can be used from the command-line also::
917 $ ase info filename ...
918 """
920 orig_filename = filename
921 if hasattr(filename, 'name'):
922 filename = filename.name
924 ext = None
925 if isinstance(filename, str):
926 if os.path.isdir(filename):
927 if os.path.basename(os.path.normpath(filename)) == 'states':
928 return 'eon'
929 return 'bundletrajectory'
931 if filename.startswith('postgres'):
932 return 'postgresql'
934 if filename.startswith('mysql') or filename.startswith('mariadb'):
935 return 'mysql'
937 if filename.endswith('aselmdb'):
938 return 'db'
940 # strip any compression extensions that can be read
941 root, _compression = get_compression(filename)
942 basename = os.path.basename(root)
944 if '.' in basename:
945 ext = os.path.splitext(basename)[1].strip('.').lower()
947 for fmt in ioformats.values():
948 if fmt.match_name(basename):
949 return fmt.name
951 if not read:
952 if ext is None:
953 raise UnknownFileTypeError('Could not guess file type')
954 ioformat = extension2format.get(ext)
955 if ioformat:
956 return ioformat.name
958 # askhl: This is strange, we don't know if ext is a format:
959 return ext
961 if orig_filename == filename:
962 fd = open_with_compression(filename, 'rb')
963 else:
964 fd = orig_filename # type: ignore[assignment]
965 else:
966 fd = filename
967 if fd is sys.stdin:
968 return 'json'
970 data = fd.read(PEEK_BYTES)
971 if fd is not filename:
972 fd.close()
973 else:
974 fd.seek(0)
976 if len(data) == 0:
977 raise UnknownFileTypeError('Empty file: ' + filename)
979 try:
980 return match_magic(data).name
981 except UnknownFileTypeError:
982 pass
984 format = None
985 if ext in extension2format:
986 format = extension2format[ext].name
988 if format is None and guess:
989 format = ext
990 if format is None:
991 # Do quick xyz check:
992 lines = data.splitlines()
993 if lines and lines[0].strip().isdigit():
994 return extension2format['xyz'].name
996 raise UnknownFileTypeError('Could not guess file type')
997 assert isinstance(format, str)
998 return format
1001def index2range(index, length):
1002 """Convert slice or integer to range.
1004 If index is an integer, range will contain only that integer."""
1005 obj = range(length)[index]
1006 if isinstance(obj, numbers.Integral):
1007 obj = range(obj, obj + 1)
1008 return obj