Coverage for /builds/ase/ase/ase/io/castep/castep_reader.py: 91.15%
339 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3import io
4import re
5import warnings
6from collections import defaultdict
7from typing import Any, Dict, List, Optional
9import numpy as np
11from ase import Atoms, units
12from ase.calculators.singlepoint import SinglePointCalculator
13from ase.constraints import FixAtoms, FixCartesian, FixConstraint
14from ase.io import ParseError
15from ase.utils import reader, string2index
18@reader
19def read_castep_castep(fd, index=-1):
20 """Read a .castep file and returns an Atoms object.
22 The calculator information will be stored in the calc attribute.
24 Notes
25 -----
26 This routine will return an atom ordering as found within the castep file.
27 This means that the species will be ordered by ascending atomic numbers.
28 The atoms witin a species are ordered as given in the original cell file.
30 """
31 # look for last result, if several CASTEP run are appended
32 line_start, line_end, end_found = _find_last_record(fd)
33 if not end_found:
34 raise ParseError(f'No regular end found in {fd.name} file.')
36 # These variables are finally assigned to `SinglePointCalculator`
37 # for backward compatibility with the `Castep` calculator.
38 cut_off_energy = None
39 kpoints = None
40 total_time = None
41 peak_memory = None
43 # jump back to the beginning to the last record
44 fd.seek(0)
45 for i, line in enumerate(fd):
46 if i == line_start:
47 break
49 # read header
50 parameters_header = _read_header(fd)
51 if 'cut_off_energy' in parameters_header:
52 cut_off_energy = parameters_header['cut_off_energy']
53 if 'basis_precision' in parameters_header:
54 del parameters_header['cut_off_energy'] # avoid conflict
56 markers_new_iteration = [
57 'BFGS: starting iteration',
58 'BFGS: improving iteration',
59 'Starting MD iteration',
60 ]
62 images = []
64 results = {}
65 constraints = []
66 species_pot = []
67 castep_warnings = []
68 for i, line in enumerate(fd):
70 if i > line_end:
71 break
73 if 'Number of kpoints used' in line:
74 kpoints = int(line.split('=')[-1].strip())
75 elif 'Unit Cell' in line:
76 lattice_real = _read_unit_cell(fd)
77 elif 'Cell Contents' in line:
78 for line in fd:
79 if 'Total number of ions in cell' in line:
80 n_atoms = int(line.split()[7])
81 if 'Total number of species in cell' in line:
82 int(line.split()[7])
83 fields = line.split()
84 if len(fields) == 0:
85 break
86 elif 'Fractional coordinates of atoms' in line:
87 species, custom_species, positions_frac = \
88 _read_fractional_coordinates(fd, n_atoms)
89 elif 'Files used for pseudopotentials' in line:
90 for line in fd:
91 line = fd.readline()
92 if 'Pseudopotential generated on-the-fly' in line:
93 continue
94 fields = line.split()
95 if len(fields) == 2:
96 species_pot.append(fields)
97 else:
98 break
99 elif 'k-Points For BZ Sampling' in line:
100 # TODO: generalize for non-Monkhorst Pack case
101 # (i.e. kpoint lists) -
102 # kpoints_offset cannot be read this way and
103 # is hence always set to None
104 for line in fd:
105 if not line.strip():
106 break
107 if 'MP grid size for SCF calculation' in line:
108 # kpoints = ' '.join(line.split()[-3:])
109 # self.kpoints_mp_grid = kpoints
110 # self.kpoints_mp_offset = '0. 0. 0.'
111 # not set here anymore because otherwise
112 # two calculator objects go out of sync
113 # after each calculation triggering unnecessary
114 # recalculation
115 break
117 elif 'Final energy' in line:
118 key = 'energy_without_dispersion_correction'
119 results[key] = float(line.split()[-2])
120 elif 'Final free energy' in line:
121 key = 'free_energy_without_dispersion_correction'
122 results[key] = float(line.split()[-2])
123 elif 'NB est. 0K energy' in line:
124 key = 'energy_zero_without_dispersion_correction'
125 results[key] = float(line.split()[-2])
127 # Add support for dispersion correction
128 # filtering due to SEDC is done in get_potential_energy
129 elif 'Dispersion corrected final energy' in line:
130 key = 'energy_with_dispersion_correlation'
131 results[key] = float(line.split()[-2])
132 elif 'Dispersion corrected final free energy' in line:
133 key = 'free_energy_with_dispersion_correlation'
134 results[key] = float(line.split()[-2])
135 elif 'NB dispersion corrected est. 0K energy' in line:
136 key = 'energy_zero_with_dispersion_correlation'
137 results[key] = float(line.split()[-2])
139 # check if we had a finite basis set correction
140 elif 'Total energy corrected for finite basis set' in line:
141 key = 'energy_with_finite_basis_set_correction'
142 results[key] = float(line.split()[-2])
144 # ******************** Forces *********************
145 # ************** Symmetrised Forces ***************
146 # ******************** Constrained Forces ********************
147 # ******************* Unconstrained Forces *******************
148 elif re.search(r'\**.* Forces \**', line):
149 forces, constraints = _read_forces(fd, n_atoms)
150 results['forces'] = np.array(forces)
152 # ***************** Stress Tensor *****************
153 # *********** Symmetrised Stress Tensor ***********
154 elif re.search(r'\**.* Stress Tensor \**', line):
155 results.update(_read_stress(fd))
157 elif any(_ in line for _ in markers_new_iteration):
158 _add_atoms(
159 images,
160 lattice_real,
161 species,
162 custom_species,
163 positions_frac,
164 constraints,
165 results,
166 )
167 # reset for the next step
168 lattice_real = None
169 species = None
170 positions_frac = None
171 results = {}
172 constraints = []
174 # extract info from the Mulliken analysis
175 elif 'Atomic Populations' in line:
176 results.update(_read_mulliken_charges(fd))
178 # extract detailed Hirshfeld analysis (iprint > 1)
179 elif 'Hirshfeld total electronic charge (e)' in line:
180 results.update(_read_hirshfeld_details(fd, n_atoms))
182 elif 'Hirshfeld Analysis' in line:
183 results.update(_read_hirshfeld_charges(fd))
185 # There is actually no good reason to get out of the loop
186 # already at this point... or do I miss something?
187 # elif 'BFGS: Final Configuration:' in line:
188 # break
189 elif 'warn' in line.lower():
190 castep_warnings.append(line)
192 # fetch some last info
193 elif 'Total time' in line:
194 pattern = r'.*=\s*([\d\.]+) s'
195 total_time = float(re.search(pattern, line).group(1))
197 elif 'Peak Memory Use' in line:
198 pattern = r'.*=\s*([\d]+) kB'
199 peak_memory = int(re.search(pattern, line).group(1))
201 # add the last image
202 _add_atoms(
203 images,
204 lattice_real,
205 species,
206 custom_species,
207 positions_frac,
208 constraints,
209 results,
210 )
212 for atoms in images:
213 # these variables are temporarily assigned to `SinglePointCalculator`
214 # to be assigned to the `Castep` calculator for backward compatibility
215 atoms.calc._cut_off_energy = cut_off_energy
216 atoms.calc._kpoints = kpoints
217 atoms.calc._species_pot = species_pot
218 atoms.calc._total_time = total_time
219 atoms.calc._peak_memory = peak_memory
220 atoms.calc._parameters_header = parameters_header
222 if castep_warnings:
223 warnings.warn('WARNING: .castep file contains warnings')
224 for warning in castep_warnings:
225 warnings.warn(warning)
227 if isinstance(index, str):
228 index = string2index(index)
230 return images[index]
233def _find_last_record(fd):
234 """Find the last record of the .castep file.
236 Returns
237 -------
238 start : int
239 Line number of the first line of the last record.
240 end : int
241 Line number of the last line of the last record.
242 end_found : bool
243 True if the .castep file ends as expected.
245 """
246 start = -1
247 for i, line in enumerate(fd):
248 if (('Welcome' in line or 'Materials Studio' in line)
249 and 'CASTEP' in line):
250 start = i
252 if start < 0:
253 warnings.warn(
254 f'Could not find CASTEP label in result file: {fd.name}.'
255 ' Are you sure this is a .castep file?'
256 )
257 return None
259 # search for regular end of file
260 end_found = False
261 # start to search from record beginning from the back
262 # and see if
263 end = -1
264 fd.seek(0)
265 for i, line in enumerate(fd):
266 if i < start:
267 continue
268 if 'Finalisation time =' in line:
269 end_found = True
270 end = i
271 break
273 return (start, end, end_found)
276def _read_header(out: io.TextIOBase):
277 """Read the header blocks from a .castep file.
279 Returns
280 -------
281 parameters : dict
282 Dictionary storing keys and values of a .param file.
283 """
284 def _parse_on_off(_: str):
285 return {'on': True, 'off': False}[_]
287 read_title = False
288 parameters: Dict[str, Any] = {}
289 for line in out:
290 if len(line) == 0: # end of file
291 break
292 if re.search(r'^\s*\*+$', line) and read_title: # end of header
293 break
295 if re.search(r'\**.* Title \**', line):
296 read_title = True
298 # General Parameters
300 elif 'output verbosity' in line:
301 parameters['iprint'] = int(line.split()[-1][1])
302 elif re.match(r'\stype of calculation\s*:', line):
303 parameters['task'] = {
304 'single point energy': 'SinglePoint',
305 'geometry optimization': 'GeometryOptimization',
306 'band structure': 'BandStructure',
307 'molecular dynamics': 'MolecularDynamics',
308 'optical properties': 'Optics',
309 'phonon calculation': 'Phonon',
310 'E-field calculation': 'Efield',
311 'Phonon followed by E-field': 'Phonon+Efield',
312 'transition state search': 'TransitionStateSearch',
313 'Magnetic Resonance': 'MagRes',
314 'Core level spectra': 'Elnes',
315 'Electronic Spectroscopy': 'ElectronicSpectroscopy',
316 }[line.split(':')[-1].strip()]
317 elif 'stress calculation' in line:
318 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])
319 elif 'calculation limited to maximum' in line:
320 parameters['run_time'] = float(line.split()[-2])
321 elif re.match(r'\soptimization strategy\s*:', line):
322 parameters['opt_strategy'] = {
323 'maximize speed(+++)': 'Speed',
324 'minimize memory(---)': 'Memory',
325 'balance speed and memory': 'Default',
326 }[line.split(':')[-1].strip()]
328 # Exchange-Correlation Parameters
329 elif re.match(r'\susing functional\s*:', line):
330 functional_abbrevs = {
331 'Local Density Approximation': 'LDA',
332 'Perdew Wang (1991)': 'PW91',
333 'Perdew Burke Ernzerhof': 'PBE',
334 'revised Perdew Burke Ernzerhof': 'RPBE',
335 'PBE with Wu-Cohen exchange': 'WC',
336 'PBE for solids (2008)': 'PBESOL',
337 'Hartree-Fock': 'HF',
338 'Hartree-Fock +': 'HF-LDA',
339 'Screened Hartree-Fock': 'sX',
340 'Screened Hartree-Fock + ': 'sX-LDA',
341 'hybrid PBE0': 'PBE0',
342 'hybrid B3LYP': 'B3LYP',
343 'hybrid HSE03': 'HSE03',
344 'hybrid HSE06': 'HSE06',
345 'RSCAN': 'RSCAN',
346 }
348 # If the name is not recognised, use the whole string.
349 # This won't work in a new calculation, so will need to load from
350 # .param file in such cases... but at least it will fail rather
351 # than use the wrong XC!
352 _xc_full_name = line.split(':')[-1].strip()
353 parameters['xc_functional'] = functional_abbrevs.get(
354 _xc_full_name, _xc_full_name)
356 elif 'DFT+D: Semi-empirical dispersion correction' in line:
357 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])
358 elif 'SEDC with' in line:
359 parameters['sedc_scheme'] = {
360 'OBS correction scheme': 'OBS',
361 'G06 correction scheme': 'G06',
362 'D3 correction scheme': 'D3',
363 'D3(BJ) correction scheme': 'D3-BJ',
364 'D4 correction scheme': 'D4',
365 'JCHS correction scheme': 'JCHS',
366 'TS correction scheme': 'TS',
367 'TSsurf correction scheme': 'TSSURF',
368 'TS+SCS correction scheme': 'TSSCS',
369 'aperiodic TS+SCS correction scheme': 'ATSSCS',
370 'aperiodic MBD@SCS method': 'AMBD',
371 'MBD@SCS method': 'MBD',
372 'aperiodic MBD@rsSCS method': 'AMBD*',
373 'MBD@rsSCS method': 'MBD*',
374 'XDM correction scheme': 'XDM',
375 }[line.split(':')[-1].strip()]
377 # Basis Set Parameters
379 elif 'basis set accuracy' in line:
380 parameters['basis_precision'] = line.split()[-1]
381 elif 'plane wave basis set cut-off' in line:
382 parameters['cut_off_energy'] = float(line.split()[-2])
383 elif re.match(r'\sfinite basis set correction\s*:', line):
384 parameters['finite_basis_corr'] = {
385 'none': 0,
386 'manual': 1,
387 'automatic': 2,
388 }[line.split()[-1]]
390 # Electronic Parameters
392 elif 'treating system as spin-polarized' in line:
393 parameters['spin_polarized'] = True
395 # Electronic Minimization Parameters
397 elif 'Treating system as non-metallic' in line:
398 parameters['fix_occupancy'] = True
399 elif 'total energy / atom convergence tol.' in line:
400 parameters['elec_energy_tol'] = float(line.split()[-2])
401 elif 'convergence tolerance window' in line:
402 parameters['elec_convergence_win'] = int(line.split()[-2])
403 elif 'max. number of SCF cycles:' in line:
404 parameters['max_scf_cycles'] = float(line.split()[-1])
405 elif 'dump wavefunctions every' in line:
406 parameters['num_dump_cycles'] = float(line.split()[-3])
408 # Density Mixing Parameters
410 elif 'density-mixing scheme' in line:
411 parameters['mixing_scheme'] = line.split()[-1]
413 return parameters
416def _read_unit_cell(out: io.TextIOBase):
417 """Read a Unit Cell block from a .castep file."""
418 for line in out:
419 fields = line.split()
420 if len(fields) == 6:
421 break
422 lattice_real = []
423 for _ in range(3):
424 lattice_real.append([float(f) for f in fields[0:3]])
425 line = out.readline()
426 fields = line.split()
427 return np.array(lattice_real)
430def _read_forces(out: io.TextIOBase, n_atoms: int):
431 """Read a block for atomic forces from a .castep file."""
432 constraints: List[FixConstraint] = []
433 forces = []
434 for line in out:
435 fields = line.split()
436 if len(fields) == 7:
437 break
438 for n in range(n_atoms):
439 consd = np.array([0, 0, 0])
440 fxyz = [0.0, 0.0, 0.0]
441 for i, force_component in enumerate(fields[-4:-1]):
442 if force_component.count("(cons'd)") > 0:
443 consd[i] = 1
444 # remove constraint labels in force components
445 fxyz[i] = float(force_component.replace("(cons'd)", ''))
446 if consd.all():
447 constraints.append(FixAtoms(n))
448 elif consd.any():
449 constraints.append(FixCartesian(n, consd))
450 forces.append(fxyz)
451 line = out.readline()
452 fields = line.split()
453 return forces, constraints
456def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):
457 """Read fractional coordinates from a .castep file."""
458 species: List[str] = []
459 custom_species: Optional[List[str]] = None # A CASTEP special thing
460 positions_frac: List[List[float]] = []
461 for line in out:
462 fields = line.split()
463 if len(fields) == 7:
464 break
465 for _ in range(n_atoms):
466 spec_custom = fields[1].split(':', 1)
467 elem = spec_custom[0]
468 if len(spec_custom) > 1 and custom_species is None:
469 # Add it to the custom info!
470 custom_species = list(species)
471 species.append(elem)
472 if custom_species is not None:
473 custom_species.append(fields[1])
474 positions_frac.append([float(s) for s in fields[3:6]])
475 line = out.readline()
476 fields = line.split()
477 return species, custom_species, positions_frac
480def _read_stress(out: io.TextIOBase):
481 """Read a block for the stress tensor from a .castep file."""
482 for line in out:
483 fields = line.split()
484 if len(fields) == 6:
485 break
486 results = {}
487 stress = []
488 for _ in range(3):
489 stress.append([float(s) for s in fields[2:5]])
490 line = out.readline()
491 fields = line.split()
492 # stress in .castep file is given in GPa
493 results['stress'] = np.array(stress) * units.GPa
494 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]
495 line = out.readline()
496 if "Pressure:" in line:
497 results['pressure'] = float(
498 line.split()[-2]) * units.GPa # type: ignore[assignment]
499 return results
502def _add_atoms(
503 images,
504 lattice_real,
505 species,
506 custom_species,
507 positions_frac,
508 constraints,
509 results,
510):
511 # If all the lattice parameters are fixed,
512 # the `Unit Cell` block in the .castep file is not printed
513 # in every ionic step.
514 # The lattice paramters are therefore taken from the last step.
515 # This happens:
516 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`
517 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`
518 if lattice_real is None:
519 lattice_real = images[-1].cell.copy()
521 # for highly symmetric systems (where essentially only the
522 # stress is optimized, but the atomic positions) positions
523 # are only printed once.
524 if species is None:
525 species = images[-1].symbols
526 if positions_frac is None:
527 positions_frac = images[-1].get_scaled_positions()
529 _set_energy_and_free_energy(results)
531 atoms = Atoms(
532 species,
533 cell=lattice_real,
534 constraint=constraints,
535 pbc=True,
536 scaled_positions=positions_frac,
537 )
538 if custom_species is not None:
539 atoms.new_array(
540 'castep_custom_species',
541 np.array(custom_species),
542 )
544 atoms.calc = SinglePointCalculator(atoms)
545 atoms.calc.results = results
547 images.append(atoms)
550def _read_mulliken_charges(out: io.TextIOBase):
551 """Read a block for Mulliken charges from a .castep file."""
552 for i in range(3):
553 line = out.readline()
554 if i == 1:
555 spin_polarized = 'Spin' in line
556 results = defaultdict(list)
557 for line in out:
558 fields = line.split()
559 if len(fields) == 1:
560 break
561 if spin_polarized:
562 if len(fields) != 7: # due to CASTEP 18 outformat changes
563 results['charges'].append(float(fields[-2]))
564 results['magmoms'].append(float(fields[-1]))
565 else:
566 results['charges'].append(float(fields[-1]))
567 return {k: np.array(v) for k, v in results.items()}
570def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):
571 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""
572 results = defaultdict(list)
573 for _ in range(n_atoms):
574 for line in out:
575 if line.strip() == '':
576 break # end for each atom
577 if 'Hirshfeld / free atomic volume :' in line:
578 line = out.readline()
579 fields = line.split()
580 results['hirshfeld_volume_ratios'].append(float(fields[0]))
581 return {k: np.array(v) for k, v in results.items()}
584def _read_hirshfeld_charges(out: io.TextIOBase):
585 """Read a block for Hirshfeld charges from a .castep file."""
586 for i in range(3):
587 line = out.readline()
588 if i == 1:
589 spin_polarized = 'Spin' in line
590 results = defaultdict(list)
591 for line in out:
592 fields = line.split()
593 if len(fields) == 1:
594 break
595 if spin_polarized:
596 results['hirshfeld_charges'].append(float(fields[-2]))
597 results['hirshfeld_magmoms'].append(float(fields[-1]))
598 else:
599 results['hirshfeld_charges'].append(float(fields[-1]))
600 return {k: np.array(v) for k, v in results.items()}
603def _set_energy_and_free_energy(results: Dict[str, Any]):
604 """Set values referred to as `energy` and `free_energy`."""
605 if 'energy_with_dispersion_correction' in results:
606 suffix = '_with_dispersion_correction'
607 else:
608 suffix = '_without_dispersion_correction'
610 if 'free_energy' + suffix in results: # metallic
611 keye = 'energy_zero' + suffix
612 keyf = 'free_energy' + suffix
613 else: # non-metallic
614 # The finite basis set correction is applied to the energy at finite T
615 # (not the energy at 0 K). We should hence refer to the corrected value
616 # as `energy` only when the free energy is unavailable, i.e., only when
617 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.
618 if 'energy_with_finite_basis_set_correction' in results:
619 keye = 'energy_with_finite_basis_set_correction'
620 else:
621 keye = 'energy' + suffix
622 keyf = 'energy' + suffix
624 results['energy'] = results[keye]
625 results['free_energy'] = results[keyf]