Source code for ase.io.runner.reader
"""RuNNer input.data support
Read files in RuNNer's input.data file format.
Contains
--------
* `read_runnerdata`: Read structures from a RuNNer input.data file.
Reference
---------
* [The online documentation of RuNNer 2.0](https://runner-suite.gitlab.io/runner2)
Contributors
------------
* Maintainer and Author: [Alexander Knoll](mailto:alexander.knoll@rub.de)
* Author: [Redouan El Haouari](mailto:redouan.elhaouari@rub.de)
"""
import re
from collections.abc import Iterator
from typing import TextIO
from ase.atoms import Atoms
from ase.utils import reader
from .runneratoms import (
DEFAULT_ATOM_LAYOUT,
PROP_NAME_DICT,
RuNNerAtoms,
Units,
)
# Regex pattern for vector properties like "forces(3)"
_vector_pattern = re.compile(r'([\w\-]+)\((\d+)\)')
def _parse_begin(begin_line: str) -> list[tuple[str, int]]:
"""Parse the begin line to determine per-atom property layout.
Returns a list of (name, length) for all properties (vector or scalar).
Scalars have length 1.
The two default fields "positions" and "element" are skipped.
"""
fields = begin_line.strip().split()[1:]
layout: list[tuple[str, int]] = []
if len(fields) == 0:
return DEFAULT_ATOM_LAYOUT
# skip the first two fields (position, element), since they are fixed.
for field in fields[2:]:
match = _vector_pattern.fullmatch(field)
if match:
name, length = match.group(1), int(match.group(2))
else:
name = field
length = 1
# Normalize property names by replacing them with the defaults defined
# in the dictionary PROP_NAME_DICT.
name = PROP_NAME_DICT.get(name, name)
layout.append((name, length))
return layout
def _parse_chunk(
chunk: str, begin_line: str, input_units: Units
) -> RuNNerAtoms:
"""Parse a single chunk into a RuNNerAtoms object."""
atom_layout = _parse_begin(begin_line)
runneratoms = RuNNerAtoms(
atom_layout=atom_layout,
input_units=input_units,
num_atom_columns=sum([i for _, i in atom_layout]) + 4,
)
for line in chunk.splitlines():
runneratoms.parse_line(line)
return runneratoms
[docs]
@reader
def read_runnerdata(
infile: TextIO,
index: int | slice = -1,
input_units: Units = Units.ATOMIC,
output_units: Units = Units.ASE,
) -> Iterator[Atoms]:
"""Parse all structures within a RuNNer input.data file.
input.data files contain all structural information needed to train a
Behler-Parrinello-type neural network potential, e.g. Cart. coordinates,
atomic forces, and energies. This function reads the file object `infile`
and returns the slice of structures given by `index`. All structures will
be converted to ASE units by default.
Parameters
----------
infile:
Python fileobj with the target input.data file.
index:
The slice of structures which should be returned. Returns only the last
structure by default.
input_units:
The given input units. Can be 'Units.ASE' or 'Units.ATOMIC'.
output_units:
The desired output units. Can be 'Units.ASE' or 'Units.ATOMIC'.
Yields
------
images:
All information about the structures within `index` of `infile`,
including symbols, positions, atomic charges, and cell lattice. Every
`Atoms` object has a `RunnerSinglePointCalculator` attached with
additional information on the total energy, atomic forces, and total
charge.
"""
# First, split input.data into separate structure "chunks".
read_infile = infile.read()
# First chunk is discarded because it is the data before the
# first "begin".
begin_pattern = re.compile(r'begin.*\n')
chunks = begin_pattern.split(read_infile)[1:]
begin_lines = begin_pattern.findall(read_infile)
# Second, only parse the chunks which the user asked for.
for begin_line, chunk in zip(begin_lines[index], chunks[index]):
runneratoms = _parse_chunk(chunk, begin_line, input_units)
runneratoms.convert(output_units)
yield runneratoms.to_ase_atoms()