Coverage for ase/io/formats.py: 90.98%

1# fmt: off

3"""File formats.

5This module implements the read(), iread() and write() functions in ase.io.

6For each file format there is an IOFormat object.

8There is a dict, ioformats, which stores the objects.

10Example

11=======

13The xyz format is implemented in the ase/io/xyz.py file which has a

14read_xyz() generator and a write_xyz() function. This and other

15information can be obtained from ioformats['xyz'].

16"""

18import functools

19import inspect

20import io

21import os

22import re

23import sys

24import warnings

25from importlib import import_module

26from importlib.metadata import entry_points

27from pathlib import PurePath

28from typing import (

29 IO,

30 Any,

31 Dict,

32 Iterator,

33 List,

34 Optional,

35 Sequence,

36 Tuple,

37 Union,

38)

40from ase.atoms import Atoms

41from ase.parallel import parallel_function, parallel_generator

42from ase.utils import string2index

43from ase.utils.plugins import ExternalIOFormat

45PEEK_BYTES = 50000

48class UnknownFileTypeError(Exception):

49 pass

52class IOFormat:

53 def __init__(self, name: str, desc: str, code: str, module_name: str,

54 encoding: str = None) -> None:

55 self.name = name

56 self.description = desc

57 assert len(code) == 2

58 assert code[0] in list('+1')

59 assert code[1] in list('BFS')

60 self.code = code

61 self.module_name = module_name

62 self.encoding = encoding

64 # (To be set by define_io_format())

65 self.extensions: List[str] = []

66 self.globs: List[str] = []

67 self.magic: List[str] = []

68 self.magic_regex: Optional[bytes] = None

70 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

71 encoding = self.encoding

72 if encoding is None:

73 encoding = 'utf-8' # Best hacky guess.

75 if self.isbinary:

76 if isinstance(data, str):

77 data = data.encode(encoding)

78 else:

79 if isinstance(data, bytes):

80 data = data.decode(encoding)

82 return self._ioclass(data)

84 @property

85 def _ioclass(self):

86 if self.isbinary:

87 return io.BytesIO

88 else:

89 return io.StringIO

91 def parse_images(self, data: Union[str, bytes],

92 **kwargs) -> Sequence[Atoms]:

93 with self._buf_as_filelike(data) as fd:

94 outputs = self.read(fd, **kwargs)

95 if self.single:

96 assert isinstance(outputs, Atoms)

97 return [outputs]

98 else:

99 return list(self.read(fd, **kwargs))

100

101 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

102 images = self.parse_images(data, **kwargs)

103 return images[-1]

104

105 @property

106 def can_read(self) -> bool:

107 return self._readfunc() is not None

108

109 @property

110 def can_write(self) -> bool:

111 return self._writefunc() is not None

112

113 @property

114 def can_append(self) -> bool:

115 writefunc = self._writefunc()

116 return self.can_write and 'append' in writefunc.__code__.co_varnames

117

118 def __repr__(self) -> str:

119 tokens = [f'{name}={value!r}'

120 for name, value in vars(self).items()]

121 return 'IOFormat({})'.format(', '.join(tokens))

122

123 def __getitem__(self, i):

124 # For compatibility.

125 #

126 # Historically, the ioformats were listed as tuples

127 # with (description, code). We look like such a tuple.

128 return (self.description, self.code)[i]

129

130 @property

131 def single(self) -> bool:

132 """Whether this format is for a single Atoms object."""

133 return self.code[0] == '1'

134

135 @property

136 def _formatname(self) -> str:

137 return self.name.replace('-', '_')

138

139 def _readfunc(self):

140 return getattr(self.module, 'read_' + self._formatname, None)

141

142 def _writefunc(self):

143 return getattr(self.module, 'write_' + self._formatname, None)

144

145 @property

146 def read(self):

147 if not self.can_read:

148 self._warn_none('read')

149 return None

150

151 return self._read_wrapper

152

153 def _read_wrapper(self, *args, **kwargs):

154 function = self._readfunc()

155 if function is None:

156 self._warn_none('read')

157 return None

158 if not inspect.isgeneratorfunction(function):

159 function = functools.partial(wrap_read_function, function)

160 return function(*args, **kwargs)

161

162 def _warn_none(self, action):

163 msg = ('Accessing the IOFormat.{action} property on a format '

164 'without {action} support will change behaviour in the '

165 'future and return a callable instead of None. '

166 'Use IOFormat.can_{action} to check whether {action} '

167 'is supported.')

168 warnings.warn(msg.format(action=action), FutureWarning)

169

170 @property

171 def write(self):

172 if not self.can_write:

173 self._warn_none('write')

174 return None

175

176 return self._write_wrapper

177

178 def _write_wrapper(self, *args, **kwargs):

179 function = self._writefunc()

180 if function is None:

181 raise ValueError(f'Cannot write to {self.name}-format')

182 return function(*args, **kwargs)

183

184 @property

185 def modes(self) -> str:

186 modes = ''

187 if self.can_read:

188 modes += 'r'

189 if self.can_write:

190 modes += 'w'

191 return modes

192

193 def full_description(self) -> str:

194 lines = [f'Name: {self.name}',

195 f'Description: {self.description}',

196 f'Modes: {self.modes}',

197 f'Encoding: {self.encoding}',

198 f'Module: {self.module_name}',

199 f'Code: {self.code}',

200 f'Extensions: {self.extensions}',

201 f'Globs: {self.globs}',

202 f'Magic: {self.magic}']

203 return '\n'.join(lines)

204

205 @property

206 def acceptsfd(self) -> bool:

207 return self.code[1] != 'S'

208

209 @property

210 def isbinary(self) -> bool:

211 return self.code[1] == 'B'

212

213 @property

214 def module(self):

215 try:

216 return import_module(self.module_name)

217 except ImportError as err:

218 raise UnknownFileTypeError(

219 f'File format not recognized: {self.name}. Error: {err}')

220

221 def match_name(self, basename: str) -> bool:

222 from fnmatch import fnmatch

223 return any(fnmatch(basename, pattern)

224 for pattern in self.globs)

225

226 def match_magic(self, data: bytes) -> bool:

227 if self.magic_regex:

228 assert not self.magic, 'Define only one of magic and magic_regex'

229 match = re.match(self.magic_regex, data, re.M | re.S)

230 return match is not None

231

232 from fnmatch import fnmatchcase

233 return any(

234 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]

235 for magic in self.magic

236 )

237

238

239ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

240extension2format = {}

241

242

243all_formats = ioformats # Aliased for compatibility only. Please do not use.

244format2modulename = {} # Left for compatibility only.

245

246

247def define_io_format(name, desc, code, *, module=None, ext=None,

248 glob=None, magic=None, encoding=None,

249 magic_regex=None, external=False):

250 if module is None:

251 module = name.replace('-', '_')

252 format2modulename[name] = module

253

254 if not external:

255 module = 'ase.io.' + module

256

257 def normalize_patterns(strings):

258 if strings is None:

259 strings = []

260 elif isinstance(strings, (str, bytes)):

261 strings = [strings]

262 else:

263 strings = list(strings)

264 return strings

265

266 fmt = IOFormat(name, desc, code, module_name=module,

267 encoding=encoding)

268 fmt.extensions = normalize_patterns(ext)

269 fmt.globs = normalize_patterns(glob)

270 fmt.magic = normalize_patterns(magic)

271

272 if magic_regex is not None:

273 fmt.magic_regex = magic_regex

274

275 for ext in fmt.extensions:

276 if ext in extension2format:

277 raise ValueError(f'extension "{ext}" already registered')

278 extension2format[ext] = fmt

279

280 ioformats[name] = fmt

281 return fmt

282

283

284def get_ioformat(name: str) -> IOFormat:

285 """Return ioformat object or raise appropriate error."""

286 if name not in ioformats:

287 raise UnknownFileTypeError(name)

288 fmt = ioformats[name]

289 # Make sure module is importable, since this could also raise an error.

290 fmt.module

291 return ioformats[name]

292

293

294def register_external_io_formats(group):

295 if hasattr(entry_points(), 'select'):

296 fmt_entry_points = entry_points().select(group=group)

297 else:

298 fmt_entry_points = entry_points().get(group, ())

299

300 for entry_point in fmt_entry_points:

301 try:

302 define_external_io_format(entry_point)

303 except Exception as exc:

304 warnings.warn(

305 'Failed to register external '

306 f'IO format {entry_point.name}: {exc}'

307 )

308

309

310def define_external_io_format(entry_point):

311

312 fmt = entry_point.load()

313 if entry_point.name in ioformats:

314 raise ValueError(f'Format {entry_point.name} already defined')

315 if not isinstance(fmt, ExternalIOFormat):

316 raise TypeError('Wrong type for registering external IO formats '

317 f'in format {entry_point.name}, expected '

318 'ExternalIOFormat')

319 F(entry_point.name, **fmt._asdict(), external=True)

320

321

322# We define all the IO formats below. Each IO format has a code,

323# such as '1F', which defines some of the format's properties:

324#

325# 1=single atoms object

326# +=multiple atoms objects

327# F=accepts a file-descriptor

328# S=needs a file-name str

329# B=like F, but opens in binary mode

330

331F = define_io_format

332F('abinit-gsr', 'ABINIT GSR file', '1S',

333 module='abinit', glob='*o_GSR.nc')

334F('abinit-in', 'ABINIT input file', '1F',

335 module='abinit', magic=b'*znucl *')

336F('abinit-out', 'ABINIT output file', '1F',

337 module='abinit', magic=b'*.Version * of ABINIT')

338F('aims', 'FHI-aims geometry file', '1S', ext='in')

339F('aims-output', 'FHI-aims output', '+S',

340 module='aims', magic=b'*Invoking FHI-aims ...')

341F('bundletrajectory', 'ASE bundle trajectory', '+S')

342# XXX: Define plugin in ase db backends package:

343# F('aselmdb', 'ASE LMDB format', '+F')

344F('castep-castep', 'CASTEP output file', '+F',

345 module='castep', ext='castep')

346F('castep-cell', 'CASTEP geom file', '1F',

347 module='castep', ext='cell')

348F('castep-geom', 'CASTEP trajectory file', '+F',

349 module='castep', ext='geom')

350F('castep-md', 'CASTEP molecular dynamics file', '+F',

351 module='castep', ext='md')

352F('castep-phonon', 'CASTEP phonon file', '1F',

353 module='castep', ext='phonon')

354F('cfg', 'AtomEye configuration', '1F')

355F('cif', 'CIF-file', '+B', ext='cif')

356F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

357F('cjson', 'Chemical json file', '1F', ext='cjson')

358F('cp2k-dcd', 'CP2K DCD file', '+B',

359 module='cp2k', ext='dcd')

360F('cp2k-restart', 'CP2K restart file', '1F',

361 module='cp2k', ext='restart')

362F('crystal', 'Crystal fort.34 format', '1F',

363 ext=['f34', '34'], glob=['f34', '34'])

364F('cube', 'CUBE file', '1F', ext='cube')

365F('dacapo-text', 'Dacapo text output', '1F',

366 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

367F('db', 'ASE SQLite database file', '+S')

368F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

369F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

370 module='dlp4', ext='config', glob=['*CONFIG*'])

371F('dlp-history', 'DL_POLY HISTORY file', '+F',

372 module='dlp4', glob='HISTORY')

373F('dmol-arc', 'DMol3 arc file', '+S',

374 module='dmol', ext='arc')

375F('dmol-car', 'DMol3 structure file', '1S',

376 module='dmol', ext='car')

377F('dmol-incoor', 'DMol3 structure file', '1S',

378 module='dmol')

379F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

380 glob=['GEOMETRY.OUT'])

381F('elk-in', 'ELK input file', '1F', module='elk')

382F('eon', 'EON CON file', '+F',

383 ext='con')

384F('eps', 'Encapsulated Postscript', '1S')

385F('espresso-in', 'Quantum espresso in file', '1F',

386 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

387F('espresso-out', 'Quantum espresso out file', '+F',

388 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')

389F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')

390F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')

391F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

392F('findsym', 'FINDSYM-format', '+F')

393F('gamess-us-out', 'GAMESS-US output file', '1F',

394 module='gamess_us', magic=b'*GAMESS')

395F('gamess-us-in', 'GAMESS-US input file', '1F',

396 module='gamess_us')

397F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

398 module='gamess_us', magic=b' $DATA', ext='dat')

399F('gaussian-in', 'Gaussian com (input) file', '1F',

400 module='gaussian', ext=['com', 'gjf'])

401F('gaussian-out', 'Gaussian output file', '+F',

402 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

403F('acemolecule-out', 'ACE output file', '1S',

404 module='acemolecule')

405F('acemolecule-input', 'ACE input file', '1S',

406 module='acemolecule')

407F('gen', 'DFTBPlus GEN format', '1F')

408F('gif', 'Graphics interchange format', '+S',

409 module='animation')

410F('gpaw-out', 'GPAW text output', '+F',

411 magic=b'* ___ ___ ___ _ _ _')

412F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

413F('gpw', 'GPAW restart-file', '1S',

414 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

415F('gromacs', 'Gromacs coordinates', '1F',

416 ext='gro')

417F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

418F('html', 'X3DOM HTML', '1F', module='x3d')

419F('json', 'ASE JSON database file', '+F', ext='json', module='db')

420F('jsv', 'JSV file format', '1F')

421F('lammps-dump-text', 'LAMMPS text dump file', '+F',

422 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

423F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

424 module='lammpsrun')

425F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

426 encoding='ascii')

427F('magres', 'MAGRES ab initio NMR data file', '1F')

428F('mol', 'MDL Molfile', '1F')

429F('mp4', 'MP4 animation', '+S',

430 module='animation')

431F('mustem', 'muSTEM xtl file', '1F',

432 ext='xtl')

433F('mysql', 'ASE MySQL database file', '+S',

434 module='db')

435F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

436 magic=b'CDF')

437F('nomad-json', 'JSON from Nomad archive', '+F',

438 ext='nomad-json')

439F('nwchem-in', 'NWChem input file', '1F',

440 module='nwchem', ext='nwi')

441F('nwchem-out', 'NWChem output file', '+F',

442 module='nwchem', ext='nwo',

443 magic=b'*Northwest Computational Chemistry Package')

444F('octopus-in', 'Octopus input file', '1F',

445 module='octopus', glob='inp')

446F('onetep-out', 'ONETEP output file', '+F',

447 module='onetep',

448 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')

449F('onetep-in', 'ONETEP input file', '1F',

450 module='onetep',

451 magic=[b'*lock species ',

452 b'*LOCK SPECIES ',

453 b'*--- INPUT FILE ---*'])

454F('orca-output', 'ORCA output', '+F',

455 module='orca', magic=b'* O R C A *')

456F('proteindatabank', 'Protein Data Bank', '+F',

457 ext='pdb')

458F('png', 'Portable Network Graphics', '1B')

459F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

460F('pov', 'Persistance of Vision', '1S')

461# prismatic: Should have ext='xyz' if/when multiple formats can have the same

462# extension

463F('prismatic', 'prismatic and computem XYZ-file', '1F')

464F('py', 'Python file', '+F')

465F('sys', 'qball sys file', '1F')

466F('qbox', 'QBOX output file', '+F',

467 magic=b'*:simulation xmlns:')

468F('res', 'SHELX format', '1S', ext='shelx')

469F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

470F('sdf', 'SDF format', '1F')

471F('siesta-xv', 'Siesta .XV file', '1F',

472 glob='*.XV', module='siesta')

473F('struct', 'WIEN2k structure file', '1S', module='wien2k')

474F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

475F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

476 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

477F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

478 magic=b'$coord')

479F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

480 module='turbomole', glob='gradient', magic=b'$grad')

481F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

482F('vasp', 'VASP POSCAR/CONTCAR', '1F',

483 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])

484F('vasp-out', 'VASP OUTCAR file', '+F',

485 module='vasp', glob='*OUTCAR*')

486F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

487 module='vasp', glob='*XDATCAR*')

488F('vasp-xml', 'VASP vasprun.xml file', '+F',

489 module='vasp', glob='*vasp*.xml')

490F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

491F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

492F('wout', 'Wannier90 output', '1F', module='wannier90')

493F('x3d', 'X3D', '1S')

494F('xsd', 'Materials Studio file', '1F')

495F('xsf', 'XCrySDen Structure File', '+F',

496 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

497 b'*\nMOLECULE', b'*\nATOMS'])

498F('xtd', 'Materials Studio file', '+F')

499# xyz: No `ext='xyz'` in the definition below.

500# The .xyz files are handled by the extxyz module by default.

501F('xyz', 'XYZ-file', '+F')

502

503# Register IO formats exposed through the ase.ioformats entry point

504register_external_io_formats('ase.ioformats')

505

506

507def get_compression(filename: str) -> Tuple[str, Optional[str]]:

508 """

509 Parse any expected file compression from the extension of a filename.

510 Return the filename without the extension, and the extension. Recognises

511 ``.gz``, ``.bz2``, ``.xz``.

512

513 >>> get_compression('H2O.pdb.gz')

514 ('H2O.pdb', 'gz')

515 >>> get_compression('crystal.cif')

516 ('crystal.cif', None)

517

518 Parameters

519 ==========

520 filename: str

521 Full filename including extension.

522

523 Returns

524 =======

525 (root, extension): (str, str or None)

526 Filename split into root without extension, and the extension

527 indicating compression format. Will not split if compression

528 is not recognised.

529 """

530 # Update if anything is added

531 valid_compression = ['gz', 'bz2', 'xz']

532

533 # Use stdlib as it handles most edge cases

534 root, compression = os.path.splitext(filename)

535

536 # extension keeps the '.' so remember to remove it

537 if compression.strip('.') in valid_compression:

538 return root, compression.strip('.')

539 else:

540 return filename, None

541

542

543def open_with_compression(filename: str, mode: str = 'r') -> IO:

544 """

545 Wrapper around builtin `open` that will guess compression of a file

546 from the filename and open it for reading or writing as if it were

547 a standard file.

548

549 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

550

551 Supported modes are:

552 * 'r', 'rt', 'w', 'wt' for text mode read and write.

553 * 'rb, 'wb' for binary read and write.

554

555 Parameters

556 ==========

557 filename: str

558 Path to the file to open, including any extensions that indicate

559 the compression used.

560 mode: str

561 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

562

563 Returns

564 =======

565 fd: file

566 File-like object open with the specified mode.

567 """

568

569 # Compressed formats sometimes default to binary, so force text mode.

570 if mode == 'r':

571 mode = 'rt'

572 elif mode == 'w':

573 mode = 'wt'

574 elif mode == 'a':

575 mode = 'at'

576

577 _root, compression = get_compression(filename)

578

579 if compression == 'gz':

580 import gzip

581 return gzip.open(filename, mode=mode) # type: ignore[return-value]

582 elif compression == 'bz2':

583 import bz2

584 return bz2.open(filename, mode=mode)

585 elif compression == 'xz':

586 import lzma

587 return lzma.open(filename, mode)

588 else:

589 # Either None or unknown string

590 return open(filename, mode)

591

592

593def is_compressed(fd: io.BufferedIOBase) -> bool:

594 """Check if the file object is in a compressed format."""

595 compressed = False

596

597 # We'd like to avoid triggering imports unless already imported.

598 # Also, Python can be compiled without e.g. lzma so we need to

599 # protect against that:

600 if 'gzip' in sys.modules:

601 import gzip

602 compressed = compressed or isinstance(fd, gzip.GzipFile)

603 if 'bz2' in sys.modules:

604 import bz2

605 compressed = compressed or isinstance(fd, bz2.BZ2File)

606 if 'lzma' in sys.modules:

607 import lzma

608 compressed = compressed or isinstance(fd, lzma.LZMAFile)

609 return compressed

610

611

612def wrap_read_function(read, filename, index=None, **kwargs):

613 """Convert read-function to generator."""

614 if index is None:

615 yield read(filename, **kwargs)

616 else:

617 yield from read(filename, index, **kwargs)

618

619

620NameOrFile = Union[str, PurePath, IO]

621

622

623def write(

624 filename: NameOrFile,

625 images: Union[Atoms, Sequence[Atoms]],

626 format: str = None,

627 parallel: bool = True,

628 append: bool = False,

629 **kwargs: Any

630) -> None:

631 """Write Atoms object(s) to file.

632

633 filename: str or file

634 Name of the file to write to or a file descriptor. The name '-'

635 means standard output.

636 images: Atoms object or list of Atoms objects

637 A single Atoms object or a list of Atoms objects.

638 format: str

639 Used to specify the file-format. If not given, the

640 file-format will be taken from suffix of the filename.

641 parallel: bool

642 Default is to write on master only. Use parallel=False to write

643 from all slaves.

644 append: bool

645 Default is to open files in 'w' or 'wb' mode, overwriting

646 existing files. In some cases opening the file in 'a' or 'ab'

647 mode (appending) is useful,

648 e.g. writing trajectories or saving multiple Atoms objects in one file.

649 WARNING: If the file format does not support multiple entries without

650 additional keywords/headers, files created using 'append=True'

651 might not be readable by any program! They will nevertheless be

652 written without error message.

653

654 The use of additional keywords is format specific. write() may

655 return an object after writing certain formats, but this behaviour

656 may change in the future.

657

658 """

659

660 if isinstance(filename, PurePath):

661 filename = str(filename)

662

663 if isinstance(filename, str):

664 fd = None

665 if filename == '-':

666 fd = sys.stdout

667 filename = None # type: ignore[assignment]

668 elif format is None:

669 format = filetype(filename, read=False)

670 assert isinstance(format, str)

671 else:

672 fd = filename # type: ignore[assignment]

673 if format is None:

674 try:

675 format = filetype(filename, read=False)

676 assert isinstance(format, str)

677 except UnknownFileTypeError:

678 format = None

679 filename = None # type: ignore[assignment]

680

681 format = format or 'json' # default is json

682

683 io = get_ioformat(format)

684

685 return _write(filename, fd, format, io, images,

686 parallel=parallel, append=append, **kwargs)

687

688

689@parallel_function

690def _write(filename, fd, format, io, images, parallel=None, append=False,

691 **kwargs):

692 if isinstance(images, Atoms):

693 images = [images]

694

695 if io.single:

696 if len(images) > 1:

697 raise ValueError('{}-format can only store 1 Atoms object.'

698 .format(format))

699 images = images[0]

700

701 if not io.can_write:

702 raise ValueError(f"Can't write to {format}-format")

703

704 # Special case for json-format:

705 if format == 'json' and (len(images) > 1 or append):

706 if filename is not None:

707 return io.write(filename, images, append=append, **kwargs)

708 raise ValueError("Can't write more than one image to file-descriptor "

709 'using json-format.')

710

711 if io.acceptsfd:

712 open_new = (fd is None)

713 try:

714 if open_new:

715 mode = 'wb' if io.isbinary else 'w'

716 if append:

717 mode = mode.replace('w', 'a')

718 fd = open_with_compression(filename, mode)

719 # XXX remember to re-enable compressed open

720 # fd = io.open(filename, mode)

721 return io.write(fd, images, **kwargs)

722 finally:

723 if open_new and fd is not None:

724 fd.close()

725 else:

726 if fd is not None:

727 raise ValueError("Can't write {}-format to file-descriptor"

728 .format(format))

729 if io.can_append:

730 return io.write(filename, images, append=append, **kwargs)

731 elif append:

732 raise ValueError("Cannot append to {}-format, write-function "

733 "does not support the append keyword."

734 .format(format))

735 else:

736 return io.write(filename, images, **kwargs)

737

738

739def read(

740 filename: NameOrFile,

741 index: Any = None,

742 format: Optional[str] = None,

743 parallel: bool = True,

744 do_not_split_by_at_sign: bool = False,

745 **kwargs

746) -> Union[Atoms, List[Atoms]]:

747 """Read Atoms object(s) from file.

748

749 filename: str or file

750 Name of the file to read from or a file descriptor.

751 index: int, slice or str

752 The last configuration will be returned by default. Examples:

753

754 * ``index=0``: first configuration

755 * ``index=-2``: second to last

756 * ``index=':'`` or ``index=slice(None)``: all

757 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

758 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

759 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

760 format: str

761 Used to specify the file-format. If not given, the

762 file-format will be guessed by the *filetype* function.

763 parallel: bool

764 Default is to read on master and broadcast to slaves. Use

765 parallel=False to read on all slaves.

766 do_not_split_by_at_sign: bool

767 If False (default) ``filename`` is splitted by at sign ``@``

768

769 Many formats allow on open file-like object to be passed instead

770 of ``filename``. In this case the format cannot be auto-detected,

771 so the ``format`` argument should be explicitly given."""

772

773 if isinstance(filename, PurePath):

774 filename = str(filename)

775 if filename == '-':

776 filename = sys.stdin

777 if isinstance(index, str):

778 try:

779 index = string2index(index)

780 except ValueError:

781 pass

782

783 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

784 if index is None:

785 index = -1

786 format = format or filetype(filename, read=isinstance(filename, str))

787

788 io = get_ioformat(format)

789 if isinstance(index, (slice, str)):

790 return list(_iread(filename, index, format, io, parallel=parallel,

791 **kwargs))

792 else:

793 return next(_iread(filename, slice(index, None), format, io,

794 parallel=parallel, **kwargs))

795

796

797def iread(

798 filename: NameOrFile,

799 index: Any = None,

800 format: str = None,

801 parallel: bool = True,

802 do_not_split_by_at_sign: bool = False,

803 **kwargs

804) -> Iterator[Atoms]:

805 """Iterator for reading Atoms objects from file.

806

807 Works as the `read` function, but yields one Atoms object at a time

808 instead of all at once."""

809

810 if isinstance(filename, PurePath):

811 filename = str(filename)

812

813 if isinstance(index, str):

814 index = string2index(index)

815

816 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

817

818 if index is None or index == ':':

819 index = slice(None, None, None)

820

821 if not isinstance(index, (slice, str)):

822 index = slice(index, (index + 1) or None)

823

824 format = format or filetype(filename, read=isinstance(filename, str))

825 io = get_ioformat(format)

826

827 yield from _iread(filename, index, format, io, parallel=parallel,

828 **kwargs)

829

830

831@parallel_generator

832def _iread(filename, index, format, io, parallel=None, full_output=False,

833 **kwargs):

834

835 if not io.can_read:

836 raise ValueError(f"Can't read from {format}-format")

837

838 if io.single:

839 start = index.start

840 assert start is None or start == 0 or start == -1

841 args = ()

842 else:

843 args = (index,)

844

845 must_close_fd = False

846 if isinstance(filename, str):

847 if io.acceptsfd:

848 mode = 'rb' if io.isbinary else 'r'

849 fd = open_with_compression(filename, mode)

850 must_close_fd = True

851 else:

852 fd = filename

853 else:

854 assert io.acceptsfd

855 fd = filename

856

857 # Make sure fd is closed in case loop doesn't finish:

858 try:

859 for dct in io.read(fd, *args, **kwargs):

860 if not isinstance(dct, dict):

861 dct = {'atoms': dct}

862 if full_output:

863 yield dct

864 else:

865 yield dct['atoms']

866 finally:

867 if must_close_fd:

868 fd.close()

869

870

871def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

872 if not isinstance(filename, str):

873 return filename, index

874

875 basename = os.path.basename(filename)

876 if do_not_split_by_at_sign or '@' not in basename:

877 return filename, index

878

879 newindex = None

880 newfilename, newindex = filename.rsplit('@', 1)

881

882 if isinstance(index, slice):

883 return newfilename, index

884 try:

885 newindex = string2index(newindex)

886 except ValueError:

887 warnings.warn('Can not parse index for path \n'

888 ' "%s" \nConsider set '

889 'do_not_split_by_at_sign=True \nif '

890 'there is no index.' % filename)

891 return newfilename, newindex

892

893

894def match_magic(data: bytes) -> IOFormat:

895 data = data[:PEEK_BYTES]

896 for ioformat in ioformats.values():

897 if ioformat.match_magic(data):

898 return ioformat

899 raise UnknownFileTypeError('Cannot guess file type from contents')

900

901

902def filetype(

903 filename: NameOrFile,

904 read: bool = True,

905 guess: bool = True,

906) -> str:

907 """Try to guess the type of the file.

908

909 First, special signatures in the filename will be checked for. If that

910 does not identify the file type, then the first 2000 bytes of the file

911 will be read and analysed. Turn off this second part by using

912 read=False.

913

914 Can be used from the command-line also::

915

916 $ ase info filename ...

917 """

918

919 orig_filename = filename

920 if hasattr(filename, 'name'):

921 filename = filename.name

922

923 ext = None

924 if isinstance(filename, str):

925 if os.path.isdir(filename):

926 if os.path.basename(os.path.normpath(filename)) == 'states':

927 return 'eon'

928 return 'bundletrajectory'

929

930 if filename.startswith('postgres'):

931 return 'postgresql'

932

933 if filename.startswith('mysql') or filename.startswith('mariadb'):

934 return 'mysql'

935

936 if filename.endswith('aselmdb'):

937 return 'db'

938

939 # strip any compression extensions that can be read

940 root, _compression = get_compression(filename)

941 basename = os.path.basename(root)

942

943 if '.' in basename:

944 ext = os.path.splitext(basename)[1].strip('.').lower()

945

946 for fmt in ioformats.values():

947 if fmt.match_name(basename):

948 return fmt.name

949

950 if not read:

951 if ext is None:

952 raise UnknownFileTypeError('Could not guess file type')

953 ioformat = extension2format.get(ext)

954 if ioformat:

955 return ioformat.name

956

957 # askhl: This is strange, we don't know if ext is a format:

958 return ext

959

960 if orig_filename == filename:

961 fd = open_with_compression(filename, 'rb')

962 else:

963 fd = orig_filename # type: ignore[assignment]

964 else:

965 fd = filename

966 if fd is sys.stdin:

967 return 'json'

968

969 data = fd.read(PEEK_BYTES)

970 if fd is not filename:

971 fd.close()

972 else:

973 fd.seek(0)

974

975 if len(data) == 0:

976 raise UnknownFileTypeError('Empty file: ' + filename)

977

978 try:

979 return match_magic(data).name

980 except UnknownFileTypeError:

981 pass

982

983 format = None

984 if ext in extension2format:

985 format = extension2format[ext].name

986

987 if format is None and guess:

988 format = ext

989 if format is None:

990 # Do quick xyz check:

991 lines = data.splitlines()

992 if lines and lines[0].strip().isdigit():

993 return extension2format['xyz'].name

994

995 raise UnknownFileTypeError('Could not guess file type')

996 assert isinstance(format, str)

997 return format

998

999

1000def index2range(index: int | slice, length: int) -> range:

1001 """Convert slice or integer to range.

1002

1003 If index is an integer, range will contain only that integer."""

1004 if isinstance(index, int):

1005 return range(index, index + 1)

1006 return range(length)[index]

Coverage for ase / io / formats.py: 90.98%

532 statements