Coverage for /builds/ase/ase/ase/io/formats.py: 91.01%

1# fmt: off

3"""File formats.

5This module implements the read(), iread() and write() functions in ase.io.

6For each file format there is an IOFormat object.

8There is a dict, ioformats, which stores the objects.

10Example

11=======

13The xyz format is implemented in the ase/io/xyz.py file which has a

14read_xyz() generator and a write_xyz() function. This and other

15information can be obtained from ioformats['xyz'].

16"""

18import functools

19import inspect

20import io

21import numbers

22import os

23import re

24import sys

25import warnings

26from importlib import import_module

27from importlib.metadata import entry_points

28from pathlib import PurePath

29from typing import (

30 IO,

31 Any,

32 Dict,

33 Iterator,

34 List,

35 Optional,

36 Sequence,

37 Tuple,

38 Union,

39)

41from ase.atoms import Atoms

42from ase.parallel import parallel_function, parallel_generator

43from ase.utils import string2index

44from ase.utils.plugins import ExternalIOFormat

46PEEK_BYTES = 50000

49class UnknownFileTypeError(Exception):

50 pass

53class IOFormat:

54 def __init__(self, name: str, desc: str, code: str, module_name: str,

55 encoding: str = None) -> None:

56 self.name = name

57 self.description = desc

58 assert len(code) == 2

59 assert code[0] in list('+1')

60 assert code[1] in list('BFS')

61 self.code = code

62 self.module_name = module_name

63 self.encoding = encoding

65 # (To be set by define_io_format())

66 self.extensions: List[str] = []

67 self.globs: List[str] = []

68 self.magic: List[str] = []

69 self.magic_regex: Optional[bytes] = None

71 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

72 encoding = self.encoding

73 if encoding is None:

74 encoding = 'utf-8' # Best hacky guess.

76 if self.isbinary:

77 if isinstance(data, str):

78 data = data.encode(encoding)

79 else:

80 if isinstance(data, bytes):

81 data = data.decode(encoding)

83 return self._ioclass(data)

85 @property

86 def _ioclass(self):

87 if self.isbinary:

88 return io.BytesIO

89 else:

90 return io.StringIO

92 def parse_images(self, data: Union[str, bytes],

93 **kwargs) -> Sequence[Atoms]:

94 with self._buf_as_filelike(data) as fd:

95 outputs = self.read(fd, **kwargs)

96 if self.single:

97 assert isinstance(outputs, Atoms)

98 return [outputs]

99 else:

100 return list(self.read(fd, **kwargs))

101

102 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

103 images = self.parse_images(data, **kwargs)

104 return images[-1]

105

106 @property

107 def can_read(self) -> bool:

108 return self._readfunc() is not None

109

110 @property

111 def can_write(self) -> bool:

112 return self._writefunc() is not None

113

114 @property

115 def can_append(self) -> bool:

116 writefunc = self._writefunc()

117 return self.can_write and 'append' in writefunc.__code__.co_varnames

118

119 def __repr__(self) -> str:

120 tokens = [f'{name}={value!r}'

121 for name, value in vars(self).items()]

122 return 'IOFormat({})'.format(', '.join(tokens))

123

124 def __getitem__(self, i):

125 # For compatibility.

126 #

127 # Historically, the ioformats were listed as tuples

128 # with (description, code). We look like such a tuple.

129 return (self.description, self.code)[i]

130

131 @property

132 def single(self) -> bool:

133 """Whether this format is for a single Atoms object."""

134 return self.code[0] == '1'

135

136 @property

137 def _formatname(self) -> str:

138 return self.name.replace('-', '_')

139

140 def _readfunc(self):

141 return getattr(self.module, 'read_' + self._formatname, None)

142

143 def _writefunc(self):

144 return getattr(self.module, 'write_' + self._formatname, None)

145

146 @property

147 def read(self):

148 if not self.can_read:

149 self._warn_none('read')

150 return None

151

152 return self._read_wrapper

153

154 def _read_wrapper(self, *args, **kwargs):

155 function = self._readfunc()

156 if function is None:

157 self._warn_none('read')

158 return None

159 if not inspect.isgeneratorfunction(function):

160 function = functools.partial(wrap_read_function, function)

161 return function(*args, **kwargs)

162

163 def _warn_none(self, action):

164 msg = ('Accessing the IOFormat.{action} property on a format '

165 'without {action} support will change behaviour in the '

166 'future and return a callable instead of None. '

167 'Use IOFormat.can_{action} to check whether {action} '

168 'is supported.')

169 warnings.warn(msg.format(action=action), FutureWarning)

170

171 @property

172 def write(self):

173 if not self.can_write:

174 self._warn_none('write')

175 return None

176

177 return self._write_wrapper

178

179 def _write_wrapper(self, *args, **kwargs):

180 function = self._writefunc()

181 if function is None:

182 raise ValueError(f'Cannot write to {self.name}-format')

183 return function(*args, **kwargs)

184

185 @property

186 def modes(self) -> str:

187 modes = ''

188 if self.can_read:

189 modes += 'r'

190 if self.can_write:

191 modes += 'w'

192 return modes

193

194 def full_description(self) -> str:

195 lines = [f'Name: {self.name}',

196 f'Description: {self.description}',

197 f'Modes: {self.modes}',

198 f'Encoding: {self.encoding}',

199 f'Module: {self.module_name}',

200 f'Code: {self.code}',

201 f'Extensions: {self.extensions}',

202 f'Globs: {self.globs}',

203 f'Magic: {self.magic}']

204 return '\n'.join(lines)

205

206 @property

207 def acceptsfd(self) -> bool:

208 return self.code[1] != 'S'

209

210 @property

211 def isbinary(self) -> bool:

212 return self.code[1] == 'B'

213

214 @property

215 def module(self):

216 try:

217 return import_module(self.module_name)

218 except ImportError as err:

219 raise UnknownFileTypeError(

220 f'File format not recognized: {self.name}. Error: {err}')

221

222 def match_name(self, basename: str) -> bool:

223 from fnmatch import fnmatch

224 return any(fnmatch(basename, pattern)

225 for pattern in self.globs)

226

227 def match_magic(self, data: bytes) -> bool:

228 if self.magic_regex:

229 assert not self.magic, 'Define only one of magic and magic_regex'

230 match = re.match(self.magic_regex, data, re.M | re.S)

231 return match is not None

232

233 from fnmatch import fnmatchcase

234 return any(

235 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]

236 for magic in self.magic

237 )

238

239

240ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

241extension2format = {}

242

243

244all_formats = ioformats # Aliased for compatibility only. Please do not use.

245format2modulename = {} # Left for compatibility only.

246

247

248def define_io_format(name, desc, code, *, module=None, ext=None,

249 glob=None, magic=None, encoding=None,

250 magic_regex=None, external=False):

251 if module is None:

252 module = name.replace('-', '_')

253 format2modulename[name] = module

254

255 if not external:

256 module = 'ase.io.' + module

257

258 def normalize_patterns(strings):

259 if strings is None:

260 strings = []

261 elif isinstance(strings, (str, bytes)):

262 strings = [strings]

263 else:

264 strings = list(strings)

265 return strings

266

267 fmt = IOFormat(name, desc, code, module_name=module,

268 encoding=encoding)

269 fmt.extensions = normalize_patterns(ext)

270 fmt.globs = normalize_patterns(glob)

271 fmt.magic = normalize_patterns(magic)

272

273 if magic_regex is not None:

274 fmt.magic_regex = magic_regex

275

276 for ext in fmt.extensions:

277 if ext in extension2format:

278 raise ValueError(f'extension "{ext}" already registered')

279 extension2format[ext] = fmt

280

281 ioformats[name] = fmt

282 return fmt

283

284

285def get_ioformat(name: str) -> IOFormat:

286 """Return ioformat object or raise appropriate error."""

287 if name not in ioformats:

288 raise UnknownFileTypeError(name)

289 fmt = ioformats[name]

290 # Make sure module is importable, since this could also raise an error.

291 fmt.module

292 return ioformats[name]

293

294

295def register_external_io_formats(group):

296 if hasattr(entry_points(), 'select'):

297 fmt_entry_points = entry_points().select(group=group)

298 else:

299 fmt_entry_points = entry_points().get(group, ())

300

301 for entry_point in fmt_entry_points:

302 try:

303 define_external_io_format(entry_point)

304 except Exception as exc:

305 warnings.warn(

306 'Failed to register external '

307 f'IO format {entry_point.name}: {exc}'

308 )

309

310

311def define_external_io_format(entry_point):

312

313 fmt = entry_point.load()

314 if entry_point.name in ioformats:

315 raise ValueError(f'Format {entry_point.name} already defined')

316 if not isinstance(fmt, ExternalIOFormat):

317 raise TypeError('Wrong type for registering external IO formats '

318 f'in format {entry_point.name}, expected '

319 'ExternalIOFormat')

320 F(entry_point.name, **fmt._asdict(), external=True)

321

322

323# We define all the IO formats below. Each IO format has a code,

324# such as '1F', which defines some of the format's properties:

325#

326# 1=single atoms object

327# +=multiple atoms objects

328# F=accepts a file-descriptor

329# S=needs a file-name str

330# B=like F, but opens in binary mode

331

332F = define_io_format

333F('abinit-gsr', 'ABINIT GSR file', '1S',

334 module='abinit', glob='*o_GSR.nc')

335F('abinit-in', 'ABINIT input file', '1F',

336 module='abinit', magic=b'*znucl *')

337F('abinit-out', 'ABINIT output file', '1F',

338 module='abinit', magic=b'*.Version * of ABINIT')

339F('aims', 'FHI-aims geometry file', '1S', ext='in')

340F('aims-output', 'FHI-aims output', '+S',

341 module='aims', magic=b'*Invoking FHI-aims ...')

342F('bundletrajectory', 'ASE bundle trajectory', '+S')

343# XXX: Define plugin in ase db backends package:

344# F('aselmdb', 'ASE LMDB format', '+F')

345F('castep-castep', 'CASTEP output file', '+F',

346 module='castep', ext='castep')

347F('castep-cell', 'CASTEP geom file', '1F',

348 module='castep', ext='cell')

349F('castep-geom', 'CASTEP trajectory file', '+F',

350 module='castep', ext='geom')

351F('castep-md', 'CASTEP molecular dynamics file', '+F',

352 module='castep', ext='md')

353F('castep-phonon', 'CASTEP phonon file', '1F',

354 module='castep', ext='phonon')

355F('cfg', 'AtomEye configuration', '1F')

356F('cif', 'CIF-file', '+B', ext='cif')

357F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

358F('cjson', 'Chemical json file', '1F', ext='cjson')

359F('cp2k-dcd', 'CP2K DCD file', '+B',

360 module='cp2k', ext='dcd')

361F('cp2k-restart', 'CP2K restart file', '1F',

362 module='cp2k', ext='restart')

363F('crystal', 'Crystal fort.34 format', '1F',

364 ext=['f34', '34'], glob=['f34', '34'])

365F('cube', 'CUBE file', '1F', ext='cube')

366F('dacapo-text', 'Dacapo text output', '1F',

367 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

368F('db', 'ASE SQLite database file', '+S')

369F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

370F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

371 module='dlp4', ext='config', glob=['*CONFIG*'])

372F('dlp-history', 'DL_POLY HISTORY file', '+F',

373 module='dlp4', glob='HISTORY')

374F('dmol-arc', 'DMol3 arc file', '+S',

375 module='dmol', ext='arc')

376F('dmol-car', 'DMol3 structure file', '1S',

377 module='dmol', ext='car')

378F('dmol-incoor', 'DMol3 structure file', '1S',

379 module='dmol')

380F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

381 glob=['GEOMETRY.OUT'])

382F('elk-in', 'ELK input file', '1F', module='elk')

383F('eon', 'EON CON file', '+F',

384 ext='con')

385F('eps', 'Encapsulated Postscript', '1S')

386F('espresso-in', 'Quantum espresso in file', '1F',

387 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

388F('espresso-out', 'Quantum espresso out file', '+F',

389 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')

390F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')

391F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')

392F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

393F('findsym', 'FINDSYM-format', '+F')

394F('gamess-us-out', 'GAMESS-US output file', '1F',

395 module='gamess_us', magic=b'*GAMESS')

396F('gamess-us-in', 'GAMESS-US input file', '1F',

397 module='gamess_us')

398F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

399 module='gamess_us', magic=b' $DATA', ext='dat')

400F('gaussian-in', 'Gaussian com (input) file', '1F',

401 module='gaussian', ext=['com', 'gjf'])

402F('gaussian-out', 'Gaussian output file', '+F',

403 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

404F('acemolecule-out', 'ACE output file', '1S',

405 module='acemolecule')

406F('acemolecule-input', 'ACE input file', '1S',

407 module='acemolecule')

408F('gen', 'DFTBPlus GEN format', '1F')

409F('gif', 'Graphics interchange format', '+S',

410 module='animation')

411F('gpaw-out', 'GPAW text output', '+F',

412 magic=b'* ___ ___ ___ _ _ _')

413F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

414F('gpw', 'GPAW restart-file', '1S',

415 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

416F('gromacs', 'Gromacs coordinates', '1F',

417 ext='gro')

418F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

419F('html', 'X3DOM HTML', '1F', module='x3d')

420F('json', 'ASE JSON database file', '+F', ext='json', module='db')

421F('jsv', 'JSV file format', '1F')

422F('lammps-dump-text', 'LAMMPS text dump file', '+F',

423 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

424F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

425 module='lammpsrun')

426F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

427 encoding='ascii')

428F('magres', 'MAGRES ab initio NMR data file', '1F')

429F('mol', 'MDL Molfile', '1F')

430F('mp4', 'MP4 animation', '+S',

431 module='animation')

432F('mustem', 'muSTEM xtl file', '1F',

433 ext='xtl')

434F('mysql', 'ASE MySQL database file', '+S',

435 module='db')

436F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

437 magic=b'CDF')

438F('nomad-json', 'JSON from Nomad archive', '+F',

439 ext='nomad-json')

440F('nwchem-in', 'NWChem input file', '1F',

441 module='nwchem', ext='nwi')

442F('nwchem-out', 'NWChem output file', '+F',

443 module='nwchem', ext='nwo',

444 magic=b'*Northwest Computational Chemistry Package')

445F('octopus-in', 'Octopus input file', '1F',

446 module='octopus', glob='inp')

447F('onetep-out', 'ONETEP output file', '+F',

448 module='onetep',

449 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')

450F('onetep-in', 'ONETEP input file', '1F',

451 module='onetep',

452 magic=[b'*lock species ',

453 b'*LOCK SPECIES ',

454 b'*--- INPUT FILE ---*'])

455F('orca-output', 'ORCA output', '+F',

456 module='orca', magic=b'* O R C A *')

457F('proteindatabank', 'Protein Data Bank', '+F',

458 ext='pdb')

459F('png', 'Portable Network Graphics', '1B')

460F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

461F('pov', 'Persistance of Vision', '1S')

462# prismatic: Should have ext='xyz' if/when multiple formats can have the same

463# extension

464F('prismatic', 'prismatic and computem XYZ-file', '1F')

465F('py', 'Python file', '+F')

466F('sys', 'qball sys file', '1F')

467F('qbox', 'QBOX output file', '+F',

468 magic=b'*:simulation xmlns:')

469F('res', 'SHELX format', '1S', ext='shelx')

470F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

471F('sdf', 'SDF format', '1F')

472F('siesta-xv', 'Siesta .XV file', '1F',

473 glob='*.XV', module='siesta')

474F('struct', 'WIEN2k structure file', '1S', module='wien2k')

475F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

476F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

477 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

478F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

479 magic=b'$coord')

480F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

481 module='turbomole', glob='gradient', magic=b'$grad')

482F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

483F('vasp', 'VASP POSCAR/CONTCAR', '1F',

484 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])

485F('vasp-out', 'VASP OUTCAR file', '+F',

486 module='vasp', glob='*OUTCAR*')

487F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

488 module='vasp', glob='*XDATCAR*')

489F('vasp-xml', 'VASP vasprun.xml file', '+F',

490 module='vasp', glob='*vasp*.xml')

491F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

492F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

493F('wout', 'Wannier90 output', '1F', module='wannier90')

494F('x3d', 'X3D', '1S')

495F('xsd', 'Materials Studio file', '1F')

496F('xsf', 'XCrySDen Structure File', '+F',

497 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

498 b'*\nMOLECULE', b'*\nATOMS'])

499F('xtd', 'Materials Studio file', '+F')

500# xyz: No `ext='xyz'` in the definition below.

501# The .xyz files are handled by the extxyz module by default.

502F('xyz', 'XYZ-file', '+F')

503

504# Register IO formats exposed through the ase.ioformats entry point

505register_external_io_formats('ase.ioformats')

506

507

508def get_compression(filename: str) -> Tuple[str, Optional[str]]:

509 """

510 Parse any expected file compression from the extension of a filename.

511 Return the filename without the extension, and the extension. Recognises

512 ``.gz``, ``.bz2``, ``.xz``.

513

514 >>> get_compression('H2O.pdb.gz')

515 ('H2O.pdb', 'gz')

516 >>> get_compression('crystal.cif')

517 ('crystal.cif', None)

518

519 Parameters

520 ==========

521 filename: str

522 Full filename including extension.

523

524 Returns

525 =======

526 (root, extension): (str, str or None)

527 Filename split into root without extension, and the extension

528 indicating compression format. Will not split if compression

529 is not recognised.

530 """

531 # Update if anything is added

532 valid_compression = ['gz', 'bz2', 'xz']

533

534 # Use stdlib as it handles most edge cases

535 root, compression = os.path.splitext(filename)

536

537 # extension keeps the '.' so remember to remove it

538 if compression.strip('.') in valid_compression:

539 return root, compression.strip('.')

540 else:

541 return filename, None

542

543

544def open_with_compression(filename: str, mode: str = 'r') -> IO:

545 """

546 Wrapper around builtin `open` that will guess compression of a file

547 from the filename and open it for reading or writing as if it were

548 a standard file.

549

550 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

551

552 Supported modes are:

553 * 'r', 'rt', 'w', 'wt' for text mode read and write.

554 * 'rb, 'wb' for binary read and write.

555

556 Parameters

557 ==========

558 filename: str

559 Path to the file to open, including any extensions that indicate

560 the compression used.

561 mode: str

562 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

563

564 Returns

565 =======

566 fd: file

567 File-like object open with the specified mode.

568 """

569

570 # Compressed formats sometimes default to binary, so force text mode.

571 if mode == 'r':

572 mode = 'rt'

573 elif mode == 'w':

574 mode = 'wt'

575 elif mode == 'a':

576 mode = 'at'

577

578 _root, compression = get_compression(filename)

579

580 if compression == 'gz':

581 import gzip

582 return gzip.open(filename, mode=mode) # type: ignore[return-value]

583 elif compression == 'bz2':

584 import bz2

585 return bz2.open(filename, mode=mode)

586 elif compression == 'xz':

587 import lzma

588 return lzma.open(filename, mode)

589 else:

590 # Either None or unknown string

591 return open(filename, mode)

592

593

594def is_compressed(fd: io.BufferedIOBase) -> bool:

595 """Check if the file object is in a compressed format."""

596 compressed = False

597

598 # We'd like to avoid triggering imports unless already imported.

599 # Also, Python can be compiled without e.g. lzma so we need to

600 # protect against that:

601 if 'gzip' in sys.modules:

602 import gzip

603 compressed = compressed or isinstance(fd, gzip.GzipFile)

604 if 'bz2' in sys.modules:

605 import bz2

606 compressed = compressed or isinstance(fd, bz2.BZ2File)

607 if 'lzma' in sys.modules:

608 import lzma

609 compressed = compressed or isinstance(fd, lzma.LZMAFile)

610 return compressed

611

612

613def wrap_read_function(read, filename, index=None, **kwargs):

614 """Convert read-function to generator."""

615 if index is None:

616 yield read(filename, **kwargs)

617 else:

618 yield from read(filename, index, **kwargs)

619

620

621NameOrFile = Union[str, PurePath, IO]

622

623

624def write(

625 filename: NameOrFile,

626 images: Union[Atoms, Sequence[Atoms]],

627 format: str = None,

628 parallel: bool = True,

629 append: bool = False,

630 **kwargs: Any

631) -> None:

632 """Write Atoms object(s) to file.

633

634 filename: str or file

635 Name of the file to write to or a file descriptor. The name '-'

636 means standard output.

637 images: Atoms object or list of Atoms objects

638 A single Atoms object or a list of Atoms objects.

639 format: str

640 Used to specify the file-format. If not given, the

641 file-format will be taken from suffix of the filename.

642 parallel: bool

643 Default is to write on master only. Use parallel=False to write

644 from all slaves.

645 append: bool

646 Default is to open files in 'w' or 'wb' mode, overwriting

647 existing files. In some cases opening the file in 'a' or 'ab'

648 mode (appending) is useful,

649 e.g. writing trajectories or saving multiple Atoms objects in one file.

650 WARNING: If the file format does not support multiple entries without

651 additional keywords/headers, files created using 'append=True'

652 might not be readable by any program! They will nevertheless be

653 written without error message.

654

655 The use of additional keywords is format specific. write() may

656 return an object after writing certain formats, but this behaviour

657 may change in the future.

658

659 """

660

661 if isinstance(filename, PurePath):

662 filename = str(filename)

663

664 if isinstance(filename, str):

665 fd = None

666 if filename == '-':

667 fd = sys.stdout

668 filename = None # type: ignore[assignment]

669 elif format is None:

670 format = filetype(filename, read=False)

671 assert isinstance(format, str)

672 else:

673 fd = filename # type: ignore[assignment]

674 if format is None:

675 try:

676 format = filetype(filename, read=False)

677 assert isinstance(format, str)

678 except UnknownFileTypeError:

679 format = None

680 filename = None # type: ignore[assignment]

681

682 format = format or 'json' # default is json

683

684 io = get_ioformat(format)

685

686 return _write(filename, fd, format, io, images,

687 parallel=parallel, append=append, **kwargs)

688

689

690@parallel_function

691def _write(filename, fd, format, io, images, parallel=None, append=False,

692 **kwargs):

693 if isinstance(images, Atoms):

694 images = [images]

695

696 if io.single:

697 if len(images) > 1:

698 raise ValueError('{}-format can only store 1 Atoms object.'

699 .format(format))

700 images = images[0]

701

702 if not io.can_write:

703 raise ValueError(f"Can't write to {format}-format")

704

705 # Special case for json-format:

706 if format == 'json' and (len(images) > 1 or append):

707 if filename is not None:

708 return io.write(filename, images, append=append, **kwargs)

709 raise ValueError("Can't write more than one image to file-descriptor "

710 'using json-format.')

711

712 if io.acceptsfd:

713 open_new = (fd is None)

714 try:

715 if open_new:

716 mode = 'wb' if io.isbinary else 'w'

717 if append:

718 mode = mode.replace('w', 'a')

719 fd = open_with_compression(filename, mode)

720 # XXX remember to re-enable compressed open

721 # fd = io.open(filename, mode)

722 return io.write(fd, images, **kwargs)

723 finally:

724 if open_new and fd is not None:

725 fd.close()

726 else:

727 if fd is not None:

728 raise ValueError("Can't write {}-format to file-descriptor"

729 .format(format))

730 if io.can_append:

731 return io.write(filename, images, append=append, **kwargs)

732 elif append:

733 raise ValueError("Cannot append to {}-format, write-function "

734 "does not support the append keyword."

735 .format(format))

736 else:

737 return io.write(filename, images, **kwargs)

738

739

740def read(

741 filename: NameOrFile,

742 index: Any = None,

743 format: Optional[str] = None,

744 parallel: bool = True,

745 do_not_split_by_at_sign: bool = False,

746 **kwargs

747) -> Union[Atoms, List[Atoms]]:

748 """Read Atoms object(s) from file.

749

750 filename: str or file

751 Name of the file to read from or a file descriptor.

752 index: int, slice or str

753 The last configuration will be returned by default. Examples:

754

755 * ``index=0``: first configuration

756 * ``index=-2``: second to last

757 * ``index=':'`` or ``index=slice(None)``: all

758 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

759 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

760 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

761 format: str

762 Used to specify the file-format. If not given, the

763 file-format will be guessed by the *filetype* function.

764 parallel: bool

765 Default is to read on master and broadcast to slaves. Use

766 parallel=False to read on all slaves.

767 do_not_split_by_at_sign: bool

768 If False (default) ``filename`` is splitted by at sign ``@``

769

770 Many formats allow on open file-like object to be passed instead

771 of ``filename``. In this case the format cannot be auto-detected,

772 so the ``format`` argument should be explicitly given."""

773

774 if isinstance(filename, PurePath):

775 filename = str(filename)

776 if filename == '-':

777 filename = sys.stdin

778 if isinstance(index, str):

779 try:

780 index = string2index(index)

781 except ValueError:

782 pass

783

784 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

785 if index is None:

786 index = -1

787 format = format or filetype(filename, read=isinstance(filename, str))

788

789 io = get_ioformat(format)

790 if isinstance(index, (slice, str)):

791 return list(_iread(filename, index, format, io, parallel=parallel,

792 **kwargs))

793 else:

794 return next(_iread(filename, slice(index, None), format, io,

795 parallel=parallel, **kwargs))

796

797

798def iread(

799 filename: NameOrFile,

800 index: Any = None,

801 format: str = None,

802 parallel: bool = True,

803 do_not_split_by_at_sign: bool = False,

804 **kwargs

805) -> Iterator[Atoms]:

806 """Iterator for reading Atoms objects from file.

807

808 Works as the `read` function, but yields one Atoms object at a time

809 instead of all at once."""

810

811 if isinstance(filename, PurePath):

812 filename = str(filename)

813

814 if isinstance(index, str):

815 index = string2index(index)

816

817 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

818

819 if index is None or index == ':':

820 index = slice(None, None, None)

821

822 if not isinstance(index, (slice, str)):

823 index = slice(index, (index + 1) or None)

824

825 format = format or filetype(filename, read=isinstance(filename, str))

826 io = get_ioformat(format)

827

828 yield from _iread(filename, index, format, io, parallel=parallel,

829 **kwargs)

830

831

832@parallel_generator

833def _iread(filename, index, format, io, parallel=None, full_output=False,

834 **kwargs):

835

836 if not io.can_read:

837 raise ValueError(f"Can't read from {format}-format")

838

839 if io.single:

840 start = index.start

841 assert start is None or start == 0 or start == -1

842 args = ()

843 else:

844 args = (index,)

845

846 must_close_fd = False

847 if isinstance(filename, str):

848 if io.acceptsfd:

849 mode = 'rb' if io.isbinary else 'r'

850 fd = open_with_compression(filename, mode)

851 must_close_fd = True

852 else:

853 fd = filename

854 else:

855 assert io.acceptsfd

856 fd = filename

857

858 # Make sure fd is closed in case loop doesn't finish:

859 try:

860 for dct in io.read(fd, *args, **kwargs):

861 if not isinstance(dct, dict):

862 dct = {'atoms': dct}

863 if full_output:

864 yield dct

865 else:

866 yield dct['atoms']

867 finally:

868 if must_close_fd:

869 fd.close()

870

871

872def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

873 if not isinstance(filename, str):

874 return filename, index

875

876 basename = os.path.basename(filename)

877 if do_not_split_by_at_sign or '@' not in basename:

878 return filename, index

879

880 newindex = None

881 newfilename, newindex = filename.rsplit('@', 1)

882

883 if isinstance(index, slice):

884 return newfilename, index

885 try:

886 newindex = string2index(newindex)

887 except ValueError:

888 warnings.warn('Can not parse index for path \n'

889 ' "%s" \nConsider set '

890 'do_not_split_by_at_sign=True \nif '

891 'there is no index.' % filename)

892 return newfilename, newindex

893

894

895def match_magic(data: bytes) -> IOFormat:

896 data = data[:PEEK_BYTES]

897 for ioformat in ioformats.values():

898 if ioformat.match_magic(data):

899 return ioformat

900 raise UnknownFileTypeError('Cannot guess file type from contents')

901

902

903def filetype(

904 filename: NameOrFile,

905 read: bool = True,

906 guess: bool = True,

907) -> str:

908 """Try to guess the type of the file.

909

910 First, special signatures in the filename will be checked for. If that

911 does not identify the file type, then the first 2000 bytes of the file

912 will be read and analysed. Turn off this second part by using

913 read=False.

914

915 Can be used from the command-line also::

916

917 $ ase info filename ...

918 """

919

920 orig_filename = filename

921 if hasattr(filename, 'name'):

922 filename = filename.name

923

924 ext = None

925 if isinstance(filename, str):

926 if os.path.isdir(filename):

927 if os.path.basename(os.path.normpath(filename)) == 'states':

928 return 'eon'

929 return 'bundletrajectory'

930

931 if filename.startswith('postgres'):

932 return 'postgresql'

933

934 if filename.startswith('mysql') or filename.startswith('mariadb'):

935 return 'mysql'

936

937 if filename.endswith('aselmdb'):

938 return 'db'

939

940 # strip any compression extensions that can be read

941 root, _compression = get_compression(filename)

942 basename = os.path.basename(root)

943

944 if '.' in basename:

945 ext = os.path.splitext(basename)[1].strip('.').lower()

946

947 for fmt in ioformats.values():

948 if fmt.match_name(basename):

949 return fmt.name

950

951 if not read:

952 if ext is None:

953 raise UnknownFileTypeError('Could not guess file type')

954 ioformat = extension2format.get(ext)

955 if ioformat:

956 return ioformat.name

957

958 # askhl: This is strange, we don't know if ext is a format:

959 return ext

960

961 if orig_filename == filename:

962 fd = open_with_compression(filename, 'rb')

963 else:

964 fd = orig_filename # type: ignore[assignment]

965 else:

966 fd = filename

967 if fd is sys.stdin:

968 return 'json'

969

970 data = fd.read(PEEK_BYTES)

971 if fd is not filename:

972 fd.close()

973 else:

974 fd.seek(0)

975

976 if len(data) == 0:

977 raise UnknownFileTypeError('Empty file: ' + filename)

978

979 try:

980 return match_magic(data).name

981 except UnknownFileTypeError:

982 pass

983

984 format = None

985 if ext in extension2format:

986 format = extension2format[ext].name

987

988 if format is None and guess:

989 format = ext

990 if format is None:

991 # Do quick xyz check:

992 lines = data.splitlines()

993 if lines and lines[0].strip().isdigit():

994 return extension2format['xyz'].name

995

996 raise UnknownFileTypeError('Could not guess file type')

997 assert isinstance(format, str)

998 return format

999

1000

1001def index2range(index, length):

1002 """Convert slice or integer to range.

1003

1004 If index is an integer, range will contain only that integer."""

1005 obj = range(length)[index]

1006 if isinstance(obj, numbers.Integral):

1007 obj = range(obj, obj + 1)

1008 return obj