Source code for ase.io.castep.geom_md_ts

"""Parsers for CASTEP .geom, .md, .ts files"""

from math import sqrt
from typing import Callable, Dict, List, Optional, Sequence, TextIO, Union

import numpy as np

from ase import Atoms
from ase.io.formats import string2index
from ase.stress import full_3x3_to_voigt_6_stress, voigt_6_to_full_3x3_stress
from ase.utils import reader, writer


class Parser:
    """Parser for <-- `key` in .geom, .md, .ts files"""

    def __init__(self, units: Optional[Dict[str, float]] = None):
        if units is None:
            from ase.io.castep import units_CODATA2002

            self.units = units_CODATA2002
        else:
            self.units = units

    def parse(self, lines: List[str], key: str, method: Callable):
        """Parse <-- `key` in `lines` using `method`"""
        relevant_lines = [line for line in lines if line.strip().endswith(key)]
        if relevant_lines:
            return method(relevant_lines, self.units)
        return None


@reader
def _read_images(
    fd: TextIO,
    index: Union[int, slice, str] = -1,
    units: Optional[Dict[str, float]] = None,
):
    """Read a .geom or a .md file written by CASTEP.

    - .geom: written by the CASTEP GeometryOptimization task
    - .md: written by the CATSTEP MolecularDynamics task

    Original contribution by Wei-Bing Zhang. Thanks!

    Parameters
    ----------
    fd : str | TextIO
        File name or object (possibly compressed with .gz and .bz2) to be read.
    index : int | slice | str, default: -1
        Index of image to be read.
    units : dict[str, float], default: None
        Dictionary with conversion factors from atomic units to ASE units.

        - ``Eh``: Hartree energy in eV
        - ``a0``: Bohr radius in Å
        - ``me``: electron mass in Da
        - ``kB``: Boltzmann constant in eV/K

        If None, values based on CODATA2002 are used.

    Returns
    -------
    Atoms | list[Atoms]
        ASE Atoms object or list of them.

    Notes
    -----
    The force-consistent energy, forces, stress are stored in ``atoms.calc``.

    Everything in the .geom or the .md file is in atomic units.
    They are converted in ASE units, i.e., Å (length), eV (energy), Da (mass).

    Stress in the .geom or the .md file includes kinetic contribution, which is
    subtracted in ``atoms.calc``.

    """
    if isinstance(index, str):
        index = string2index(index)
    if isinstance(index, str):
        raise ValueError(index)
    return list(_iread_images(fd, units))[index]


read_castep_geom = _read_images
read_castep_md = _read_images


def _iread_images(fd: TextIO, units: Optional[Dict[str, float]] = None):
    """Read a .geom or .md file of CASTEP MolecularDynamics as a generator."""
    parser = Parser(units)
    _read_header(fd)
    lines = []
    for line in fd:
        if line.strip():
            lines.append(line)
        else:
            yield _read_atoms(lines, parser)
            lines = []


iread_castep_geom = _iread_images
iread_castep_md = _iread_images


def _read_atoms(lines: List[str], parser: Parser) -> Atoms:
    from ase.calculators.singlepoint import SinglePointCalculator

    energy = parser.parse(lines, '<-- E', _read_energies)
    cell = parser.parse(lines, '<-- h', _read_cell)
    stress = parser.parse(lines, '<-- S', _read_stress)
    symbols, positions = parser.parse(lines, '<-- R', _read_positions)
    velocities = parser.parse(lines, '<-- V', _read_velocities)
    forces = parser.parse(lines, '<-- F', _read_forces)

    # Currently unused tags:
    #
    # temperature = parser.parse(lines, '<-- T', _read_temperature)
    # pressure = parser.parse(lines, '<-- P', _read_pressure)
    # cell_velocities = parser.extract(lines, '<-- hv', _read_cell_velocities)

    atoms = Atoms(symbols, positions, cell=cell, pbc=True)

    if velocities is not None:  # MolecularDynamics
        units = parser.units
        factor = units['a0'] * sqrt(units['me'] / units['Eh'])
        atoms.info['time'] = float(lines[0].split()[0]) * factor  # au -> ASE
        atoms.set_velocities(velocities)

    if stress is not None:
        stress -= atoms.get_kinetic_stress(voigt=True)

    # The energy in .geom or .md file is the force-consistent one
    # (possibly with the the finite-basis-set correction when, e.g.,
    # finite_basis_corr!=0 in GeometryOptimisation).
    # It should therefore be reasonable to assign it to `free_energy`.
    # Be also aware that the energy in .geom file not 0K extrapolated.
    atoms.calc = SinglePointCalculator(
        atoms=atoms,
        free_energy=energy,
        forces=forces,
        stress=stress,
    )

    return atoms


def _read_header(fd: TextIO):
    for line in fd:
        if 'END header' in line:
            next(fd)  # read blank line below 'END header'
            break


def _read_energies(lines: List[str], units: Dict[str, float]) -> float:
    """Read force-consistent energy

    Notes
    -----
    Enthalpy and kinetic energy (in .md) are also written in the same line.
    They are however not parsed because they can be computed using stress and
    atomic velocties, respsectively.
    """
    return float(lines[0].split()[0]) * units['Eh']


def _read_temperature(lines: List[str], units: Dict[str, float]) -> float:
    """Read temperature

    Notes
    -----
    Temperature can be computed from kinetic energy and hence not necessary.
    """
    factor = units['Eh'] / units['kB']  # hartree -> K
    return float(lines[0].split()[0]) * factor


def _read_pressure(lines: List[str], units: Dict[str, float]) -> float:
    """Read pressure

    Notes
    -----
    Pressure can be computed from stress and hence not necessary.
    """
    factor = units['Eh'] / units['a0'] ** 3  # au -> eV/A3
    return float(lines[0].split()[0]) * factor


def _read_cell(lines: List[str], units: Dict[str, float]) -> np.ndarray:
    bohr = units['a0']
    cell = np.array([line.split()[0:3] for line in lines], dtype=float)
    return cell * bohr


# def _read_cell_velocities(lines: List[str], units: Dict[str, float]):
#     hartree = units['Eh']
#     me = units['me']
#     cell_velocities = np.array([_.split()[0:3] for _ in lines], dtype=float)
#     return cell_velocities * np.sqrt(hartree / me)


def _read_stress(lines: List[str], units: Dict[str, float]) -> np.ndarray:
    hartree = units['Eh']
    bohr = units['a0']
    stress = np.array([line.split()[0:3] for line in lines], dtype=float)
    return full_3x3_to_voigt_6_stress(stress) * (hartree / bohr**3)


def _read_positions(
    lines: List[str], units: Dict[str, float]
) -> tuple[list[str], np.ndarray]:
    bohr = units['a0']
    symbols = [line.split()[0] for line in lines]
    positions = np.array([line.split()[2:5] for line in lines], dtype=float)
    return symbols, positions * bohr


def _read_velocities(lines: List[str], units: Dict[str, float]) -> np.ndarray:
    hartree = units['Eh']
    me = units['me']
    velocities = np.array([line.split()[2:5] for line in lines], dtype=float)
    return velocities * np.sqrt(hartree / me)


def _read_forces(lines: List[str], units: Dict[str, float]) -> np.ndarray:
    hartree = units['Eh']
    bohr = units['a0']
    forces = np.array([line.split()[2:5] for line in lines], dtype=float)
    return forces * (hartree / bohr)


[docs] @writer def write_castep_geom( fd: TextIO, images: Union[Atoms, Sequence[Atoms]], units: Optional[Dict[str, float]] = None, *, pressure: float = 0.0, sort: bool = False, ): """Write a CASTEP .geom file. .. versionadded:: 3.25.0 Parameters ---------- fd : str | TextIO File name or object (possibly compressed with .gz and .bz2) to be read. images : Atoms | Sequenece[Atoms] ASE Atoms object(s) to be written. units : dict[str, float], default: None Dictionary with conversion factors from atomic units to ASE units. - ``Eh``: Hartree energy in eV - ``a0``: Bohr radius in Å - ``me``: electron mass in Da - ``kB``: Boltzmann constant in eV/K If None, values based on CODATA2002 are used. pressure : float, default: 0.0 External pressure in eV/Å\\ :sup:`3`. sort : bool, default: False If True, atoms are sorted in ascending order of atomic number. Notes ----- - Values in the .geom file are in atomic units. - Stress is printed including kinetic contribution. """ if isinstance(images, Atoms): images = [images] if units is None: from ase.io.castep import units_CODATA2002 units = units_CODATA2002 _write_header(fd) for index, atoms in enumerate(images): if sort: atoms = atoms[atoms.numbers.argsort()] _write_convergence_status(fd, index) _write_energies_geom(fd, atoms, units, pressure) _write_cell(fd, atoms, units) _write_stress(fd, atoms, units) _write_positions(fd, atoms, units) _write_forces(fd, atoms, units) fd.write(' \n')
[docs] @writer def write_castep_md( fd: TextIO, images: Union[Atoms, Sequence[Atoms]], units: Optional[Dict[str, float]] = None, *, pressure: float = 0.0, sort: bool = False, ): """Write a CASTEP .md file. .. versionadded:: 3.25.0 Parameters ---------- fd : str | TextIO File name or object (possibly compressed with .gz and .bz2) to be read. images : Atoms | Sequenece[Atoms] ASE Atoms object(s) to be written. units : dict[str, float], default: None Dictionary with conversion factors from atomic units to ASE units. - ``Eh``: Hartree energy in eV - ``a0``: Bohr radius in Å - ``me``: electron mass in Da - ``kB``: Boltzmann constant in eV/K If None, values based on CODATA2002 are used. pressure : float, default: 0.0 External pressure in eV/Å\\ :sup:`3`. sort : bool, default: False If True, atoms are sorted in ascending order of atomic number. Notes ----- - Values in the .md file are in atomic units. - Stress is printed including kinetic contribution. """ if isinstance(images, Atoms): images = [images] if units is None: from ase.io.castep import units_CODATA2002 units = units_CODATA2002 _write_header(fd) for index, atoms in enumerate(images): if sort: atoms = atoms[atoms.numbers.argsort()] _write_time(fd, index) _write_energies_md(fd, atoms, units, pressure) _write_temperature(fd, atoms, units) _write_cell(fd, atoms, units) _write_cell_velocities(fd, atoms, units) _write_stress(fd, atoms, units) _write_positions(fd, atoms, units) _write_velocities(fd, atoms, units) _write_forces(fd, atoms, units) fd.write(' \n')
def _format_float(x: float) -> str: """Format a floating number for .geom and .md files""" return np.format_float_scientific( x, precision=16, unique=False, pad_left=2, exp_digits=3, ).replace('e', 'E') def _write_header(fd: TextIO): fd.write(' BEGIN header\n') fd.write(' \n') fd.write(' END header\n') fd.write(' \n') def _write_convergence_status(fd: TextIO, index: int): fd.write(21 * ' ') fd.write(f'{index:18d}') fd.write(34 * ' ') for _ in range(4): fd.write(' F') # Convergence status. So far F for all. fd.write(10 * ' ') fd.write(' <-- c\n') def _write_time(fd: TextIO, index: int): fd.write(18 * ' ' + f' {_format_float(index)}\n') # So far index. def _write_energies_geom( fd: TextIO, atoms: Atoms, units: Dict[str, float], pressure: float = 0.0, ): """Write energies (in hartree) in a CASTEP .geom file. The energy and the enthalpy are printed. """ hartree = units['Eh'] if atoms.calc is None: return if atoms.calc.results.get('free_energy') is None: return energy = atoms.calc.results.get('free_energy') / hartree pv = pressure * atoms.get_volume() / hartree fd.write(18 * ' ') fd.write(f' {_format_float(energy)}') fd.write(f' {_format_float(energy + pv)}') fd.write(27 * ' ') fd.write(' <-- E\n') def _write_energies_md( fd: TextIO, atoms: Atoms, units: Dict[str, float], pressure: float = 0.0, ): """Write energies (in hartree) in a CASTEP .md file. The potential energy, the total energy or enthalpy, and the kinetic energy are printed. Notes ----- For the second item, CASTEP prints the total energy for the NVE and the NVT ensembles and the total enthalpy for the NPH and NPT ensembles. For the Nosé–Hoover (chain) thermostat, furthermore, the energies of the thermostats are also added. """ hartree = units['Eh'] if atoms.calc is None: return if atoms.calc.results.get('free_energy') is None: return potential = atoms.calc.results.get('free_energy') / hartree kinetic = atoms.get_kinetic_energy() / hartree pv = pressure * atoms.get_volume() / hartree fd.write(18 * ' ') fd.write(f' {_format_float(potential)}') fd.write(f' {_format_float(potential + kinetic + pv)}') fd.write(f' {_format_float(kinetic)}') fd.write(' <-- E\n') def _write_temperature(fd: TextIO, atoms: Atoms, units: Dict[str, float]): """Write temperature (in hartree) in a CASTEP .md file. CASTEP writes the temperature in a .md file with 3`N` degrees of freedom regardless of the given constraints including the fixed center of mass (``FIX_COM`` which is by default ``TRUE`` in many cases). To get the consistent result with the above behavior of CASTEP using this method, we must set the corresponding constraints (e.g. ``FixCom``) to the ``Atoms`` object beforehand. """ hartree = units['Eh'] # eV boltzmann = units['kB'] # eV/K temperature = atoms.get_temperature() * boltzmann / hartree fd.write(18 * ' ') fd.write(f' {_format_float(temperature)}') fd.write(54 * ' ') fd.write(' <-- T\n') def _write_cell(fd: TextIO, atoms: Atoms, units: Dict[str, float]): bohr = units['a0'] cell = atoms.cell / bohr # in bohr for i in range(3): fd.write(18 * ' ') for j in range(3): fd.write(f' {_format_float(cell[i, j])}') fd.write(' <-- h\n') def _write_cell_velocities(fd: TextIO, atoms: Atoms, units: Dict[str, float]): pass # TODO: to be implemented def _write_stress(fd: TextIO, atoms: Atoms, units: Dict[str, float]): if atoms.calc is None: return stress = atoms.calc.results.get('stress') if stress is None: return if stress.shape != (3, 3): stress = voigt_6_to_full_3x3_stress(stress) stress += atoms.get_kinetic_stress(voigt=False) hartree = units['Eh'] bohr = units['a0'] stress = stress / (hartree / bohr**3) for i in range(3): fd.write(18 * ' ') for j in range(3): fd.write(f' {_format_float(stress[i, j])}') fd.write(' <-- S\n') def _write_positions(fd: TextIO, atoms: Atoms, units: Dict[str, float]): bohr = units['a0'] positions = atoms.positions / bohr symbols = atoms.symbols indices = symbols.species_indices() for i, symbol, position in zip(indices, symbols, positions): fd.write(f' {symbol:8s}') fd.write(f' {i + 1:8d}') for j in range(3): fd.write(f' {_format_float(position[j])}') fd.write(' <-- R\n') def _write_forces(fd: TextIO, atoms: Atoms, units: Dict[str, float]): if atoms.calc is None: return forces = atoms.calc.results.get('forces') if forces is None: return hartree = units['Eh'] bohr = units['a0'] forces = forces / (hartree / bohr) symbols = atoms.symbols indices = symbols.species_indices() for i, symbol, force in zip(indices, symbols, forces): fd.write(f' {symbol:8s}') fd.write(f' {i + 1:8d}') for j in range(3): fd.write(f' {_format_float(force[j])}') fd.write(' <-- F\n') def _write_velocities(fd: TextIO, atoms: Atoms, units: Dict[str, float]): hartree = units['Eh'] me = units['me'] velocities = atoms.get_velocities() / np.sqrt(hartree / me) symbols = atoms.symbols indices = symbols.species_indices() for i, symbol, velocity in zip(indices, symbols, velocities): fd.write(f' {symbol:8s}') fd.write(f' {i + 1:8d}') for j in range(3): fd.write(f' {_format_float(velocity[j])}') fd.write(' <-- V\n')