Coverage for ase / io / castep / geom_md_ts.py: 93.65%

252 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1"""Parsers for CASTEP .geom, .md, .ts files""" 

2 

3from collections.abc import Callable, Sequence 

4from math import sqrt 

5from typing import TextIO 

6 

7import numpy as np 

8 

9from ase import Atoms 

10from ase.io.formats import string2index 

11from ase.stress import full_3x3_to_voigt_6_stress, voigt_6_to_full_3x3_stress 

12from ase.utils import reader, writer 

13 

14 

15class Parser: 

16 """Parser for <-- `key` in .geom, .md, .ts files""" 

17 

18 def __init__(self, units: dict[str, float] | None = None): 

19 if units is None: 

20 from ase.io.castep import units_CODATA2002 

21 

22 self.units = units_CODATA2002 

23 else: 

24 self.units = units 

25 

26 def parse(self, lines: list[str], key: str, method: Callable): 

27 """Parse <-- `key` in `lines` using `method`""" 

28 relevant_lines = [line for line in lines if line.strip().endswith(key)] 

29 if relevant_lines: 

30 return method(relevant_lines, self.units) 

31 return None 

32 

33 

34@reader 

35def _read_images( 

36 fd: TextIO, 

37 index: int | slice | str = -1, 

38 units: dict[str, float] | None = None, 

39): 

40 """Read a .geom or a .md file written by CASTEP. 

41 

42 - .geom: written by the CASTEP GeometryOptimization task 

43 - .md: written by the CATSTEP MolecularDynamics task 

44 

45 Original contribution by Wei-Bing Zhang. Thanks! 

46 

47 Parameters 

48 ---------- 

49 fd : str | TextIO 

50 File name or object (possibly compressed with .gz and .bz2) to be read. 

51 index : int | slice | str, default: -1 

52 Index of image to be read. 

53 units : dict[str, float], default: None 

54 Dictionary with conversion factors from atomic units to ASE units. 

55 

56 - ``Eh``: Hartree energy in eV 

57 - ``a0``: Bohr radius in Å 

58 - ``me``: electron mass in Da 

59 - ``kB``: Boltzmann constant in eV/K 

60 

61 If None, values based on CODATA2002 are used. 

62 

63 Returns 

64 ------- 

65 Atoms | list[Atoms] 

66 ASE Atoms object or list of them. 

67 

68 Notes 

69 ----- 

70 The force-consistent energy, forces, stress are stored in ``atoms.calc``. 

71 

72 Everything in the .geom or the .md file is in atomic units. 

73 They are converted in ASE units, i.e., Å (length), eV (energy), Da (mass). 

74 

75 Stress in the .geom or the .md file includes kinetic contribution, which is 

76 subtracted in ``atoms.calc``. 

77 

78 """ 

79 if isinstance(index, str): 

80 index = string2index(index) 

81 if isinstance(index, str): 

82 raise ValueError(index) 

83 return list(_iread_images(fd, units))[index] 

84 

85 

86read_castep_geom = _read_images 

87read_castep_md = _read_images 

88 

89 

90def _iread_images(fd: TextIO, units: dict[str, float] | None = None): 

91 """Read a .geom or .md file of CASTEP MolecularDynamics as a generator.""" 

92 parser = Parser(units) 

93 _read_header(fd) 

94 lines = [] 

95 for line in fd: 

96 if line.strip(): 

97 lines.append(line) 

98 else: 

99 yield _read_atoms(lines, parser) 

100 lines = [] 

101 

102 

103iread_castep_geom = _iread_images 

104iread_castep_md = _iread_images 

105 

106 

107def _read_atoms(lines: list[str], parser: Parser) -> Atoms: 

108 from ase.calculators.singlepoint import SinglePointCalculator 

109 

110 energy = parser.parse(lines, '<-- E', _read_energies) 

111 cell = parser.parse(lines, '<-- h', _read_cell) 

112 stress = parser.parse(lines, '<-- S', _read_stress) 

113 symbols, positions = parser.parse(lines, '<-- R', _read_positions) 

114 velocities = parser.parse(lines, '<-- V', _read_velocities) 

115 forces = parser.parse(lines, '<-- F', _read_forces) 

116 

117 # Currently unused tags: 

118 # 

119 # temperature = parser.parse(lines, '<-- T', _read_temperature) 

120 # pressure = parser.parse(lines, '<-- P', _read_pressure) 

121 # cell_velocities = parser.extract(lines, '<-- hv', _read_cell_velocities) 

122 

123 atoms = Atoms(symbols, positions, cell=cell, pbc=True) 

124 

125 if velocities is not None: # MolecularDynamics 

126 units = parser.units 

127 factor = units['a0'] * sqrt(units['me'] / units['Eh']) 

128 atoms.info['time'] = float(lines[0].split()[0]) * factor # au -> ASE 

129 atoms.set_velocities(velocities) 

130 

131 if stress is not None: 

132 stress -= atoms.get_kinetic_stress(voigt=True) 

133 

134 # The energy in .geom or .md file is the force-consistent one 

135 # (possibly with the the finite-basis-set correction when, e.g., 

136 # finite_basis_corr!=0 in GeometryOptimisation). 

137 # It should therefore be reasonable to assign it to `free_energy`. 

138 # Be also aware that the energy in .geom file not 0K extrapolated. 

139 atoms.calc = SinglePointCalculator( 

140 atoms=atoms, 

141 free_energy=energy, 

142 forces=forces, 

143 stress=stress, 

144 ) 

145 

146 return atoms 

147 

148 

149def _read_header(fd: TextIO): 

150 for line in fd: 

151 if 'END header' in line: 

152 next(fd) # read blank line below 'END header' 

153 break 

154 

155 

156def _read_energies(lines: list[str], units: dict[str, float]) -> float: 

157 """Read force-consistent energy 

158 

159 Notes 

160 ----- 

161 Enthalpy and kinetic energy (in .md) are also written in the same line. 

162 They are however not parsed because they can be computed using stress and 

163 atomic velocties, respsectively. 

164 """ 

165 return float(lines[0].split()[0]) * units['Eh'] 

166 

167 

168def _read_temperature(lines: list[str], units: dict[str, float]) -> float: 

169 """Read temperature 

170 

171 Notes 

172 ----- 

173 Temperature can be computed from kinetic energy and hence not necessary. 

174 """ 

175 factor = units['Eh'] / units['kB'] # hartree -> K 

176 return float(lines[0].split()[0]) * factor 

177 

178 

179def _read_pressure(lines: list[str], units: dict[str, float]) -> float: 

180 """Read pressure 

181 

182 Notes 

183 ----- 

184 Pressure can be computed from stress and hence not necessary. 

185 """ 

186 factor = units['Eh'] / units['a0'] ** 3 # au -> eV/A3 

187 return float(lines[0].split()[0]) * factor 

188 

189 

190def _read_cell(lines: list[str], units: dict[str, float]) -> np.ndarray: 

191 bohr = units['a0'] 

192 cell = np.array([line.split()[0:3] for line in lines], dtype=float) 

193 return cell * bohr 

194 

195 

196# def _read_cell_velocities(lines: List[str], units: Dict[str, float]): 

197# hartree = units['Eh'] 

198# me = units['me'] 

199# cell_velocities = np.array([_.split()[0:3] for _ in lines], dtype=float) 

200# return cell_velocities * np.sqrt(hartree / me) 

201 

202 

203def _read_stress(lines: list[str], units: dict[str, float]) -> np.ndarray: 

204 hartree = units['Eh'] 

205 bohr = units['a0'] 

206 stress = np.array([line.split()[0:3] for line in lines], dtype=float) 

207 return full_3x3_to_voigt_6_stress(stress) * (hartree / bohr**3) 

208 

209 

210def _read_positions( 

211 lines: list[str], units: dict[str, float] 

212) -> tuple[list[str], np.ndarray]: 

213 bohr = units['a0'] 

214 symbols = [line.split()[0] for line in lines] 

215 positions = np.array([line.split()[2:5] for line in lines], dtype=float) 

216 return symbols, positions * bohr 

217 

218 

219def _read_velocities(lines: list[str], units: dict[str, float]) -> np.ndarray: 

220 hartree = units['Eh'] 

221 me = units['me'] 

222 velocities = np.array([line.split()[2:5] for line in lines], dtype=float) 

223 return velocities * np.sqrt(hartree / me) 

224 

225 

226def _read_forces(lines: list[str], units: dict[str, float]) -> np.ndarray: 

227 hartree = units['Eh'] 

228 bohr = units['a0'] 

229 forces = np.array([line.split()[2:5] for line in lines], dtype=float) 

230 return forces * (hartree / bohr) 

231 

232 

233@writer 

234def write_castep_geom( 

235 fd: TextIO, 

236 images: Atoms | Sequence[Atoms], 

237 units: dict[str, float] | None = None, 

238 *, 

239 pressure: float = 0.0, 

240 sort: bool = False, 

241): 

242 """Write a CASTEP .geom file. 

243 

244 .. versionadded:: 3.25.0 

245 

246 Parameters 

247 ---------- 

248 fd : str | TextIO 

249 File name or object (possibly compressed with .gz and .bz2) to be read. 

250 images : Atoms | Sequenece[Atoms] 

251 ASE Atoms object(s) to be written. 

252 units : dict[str, float], default: None 

253 Dictionary with conversion factors from atomic units to ASE units. 

254 

255 - ``Eh``: Hartree energy in eV 

256 - ``a0``: Bohr radius in Å 

257 - ``me``: electron mass in Da 

258 - ``kB``: Boltzmann constant in eV/K 

259 

260 If None, values based on CODATA2002 are used. 

261 pressure : float, default: 0.0 

262 External pressure in eV/Å\\ :sup:`3`. 

263 sort : bool, default: False 

264 If True, atoms are sorted in ascending order of atomic number. 

265 

266 Notes 

267 ----- 

268 - Values in the .geom file are in atomic units. 

269 - Stress is printed including kinetic contribution. 

270 

271 """ 

272 if isinstance(images, Atoms): 

273 images = [images] 

274 

275 if units is None: 

276 from ase.io.castep import units_CODATA2002 

277 

278 units = units_CODATA2002 

279 

280 _write_header(fd) 

281 

282 for index, atoms in enumerate(images): 

283 if sort: 

284 atoms = atoms[atoms.numbers.argsort()] 

285 _write_convergence_status(fd, index) 

286 _write_energies_geom(fd, atoms, units, pressure) 

287 _write_cell(fd, atoms, units) 

288 _write_stress(fd, atoms, units) 

289 _write_positions(fd, atoms, units) 

290 _write_forces(fd, atoms, units) 

291 fd.write(' \n') 

292 

293 

294@writer 

295def write_castep_md( 

296 fd: TextIO, 

297 images: Atoms | Sequence[Atoms], 

298 units: dict[str, float] | None = None, 

299 *, 

300 pressure: float = 0.0, 

301 sort: bool = False, 

302): 

303 """Write a CASTEP .md file. 

304 

305 .. versionadded:: 3.25.0 

306 

307 Parameters 

308 ---------- 

309 fd : str | TextIO 

310 File name or object (possibly compressed with .gz and .bz2) to be read. 

311 images : Atoms | Sequenece[Atoms] 

312 ASE Atoms object(s) to be written. 

313 units : dict[str, float], default: None 

314 Dictionary with conversion factors from atomic units to ASE units. 

315 

316 - ``Eh``: Hartree energy in eV 

317 - ``a0``: Bohr radius in Å 

318 - ``me``: electron mass in Da 

319 - ``kB``: Boltzmann constant in eV/K 

320 

321 If None, values based on CODATA2002 are used. 

322 pressure : float, default: 0.0 

323 External pressure in eV/Å\\ :sup:`3`. 

324 sort : bool, default: False 

325 If True, atoms are sorted in ascending order of atomic number. 

326 

327 Notes 

328 ----- 

329 - Values in the .md file are in atomic units. 

330 - Stress is printed including kinetic contribution. 

331 

332 """ 

333 if isinstance(images, Atoms): 

334 images = [images] 

335 

336 if units is None: 

337 from ase.io.castep import units_CODATA2002 

338 

339 units = units_CODATA2002 

340 

341 _write_header(fd) 

342 

343 for index, atoms in enumerate(images): 

344 if sort: 

345 atoms = atoms[atoms.numbers.argsort()] 

346 _write_time(fd, index) 

347 _write_energies_md(fd, atoms, units, pressure) 

348 _write_temperature(fd, atoms, units) 

349 _write_cell(fd, atoms, units) 

350 _write_cell_velocities(fd, atoms, units) 

351 _write_stress(fd, atoms, units) 

352 _write_positions(fd, atoms, units) 

353 _write_velocities(fd, atoms, units) 

354 _write_forces(fd, atoms, units) 

355 fd.write(' \n') 

356 

357 

358def _format_float(x: float) -> str: 

359 """Format a floating number for .geom and .md files""" 

360 return np.format_float_scientific( 

361 x, 

362 precision=16, 

363 unique=False, 

364 pad_left=2, 

365 exp_digits=3, 

366 ).replace('e', 'E') 

367 

368 

369def _write_header(fd: TextIO): 

370 fd.write(' BEGIN header\n') 

371 fd.write(' \n') 

372 fd.write(' END header\n') 

373 fd.write(' \n') 

374 

375 

376def _write_convergence_status(fd: TextIO, index: int): 

377 fd.write(21 * ' ') 

378 fd.write(f'{index:18d}') 

379 fd.write(34 * ' ') 

380 for _ in range(4): 

381 fd.write(' F') # Convergence status. So far F for all. 

382 fd.write(10 * ' ') 

383 fd.write(' <-- c\n') 

384 

385 

386def _write_time(fd: TextIO, index: int): 

387 fd.write(18 * ' ' + f' {_format_float(index)}\n') # So far index. 

388 

389 

390def _write_energies_geom( 

391 fd: TextIO, 

392 atoms: Atoms, 

393 units: dict[str, float], 

394 pressure: float = 0.0, 

395): 

396 """Write energies (in hartree) in a CASTEP .geom file. 

397 

398 The energy and the enthalpy are printed. 

399 """ 

400 hartree = units['Eh'] 

401 if atoms.calc is None: 

402 return 

403 if atoms.calc.results.get('free_energy') is None: 

404 return 

405 energy = atoms.calc.results.get('free_energy') / hartree 

406 pv = pressure * atoms.get_volume() / hartree 

407 fd.write(18 * ' ') 

408 fd.write(f' {_format_float(energy)}') 

409 fd.write(f' {_format_float(energy + pv)}') 

410 fd.write(27 * ' ') 

411 fd.write(' <-- E\n') 

412 

413 

414def _write_energies_md( 

415 fd: TextIO, 

416 atoms: Atoms, 

417 units: dict[str, float], 

418 pressure: float = 0.0, 

419): 

420 """Write energies (in hartree) in a CASTEP .md file. 

421 

422 The potential energy, the total energy or enthalpy, and the kinetic energy 

423 are printed. 

424 

425 Notes 

426 ----- 

427 For the second item, CASTEP prints the total energy for the NVE and the NVT 

428 ensembles and the total enthalpy for the NPH and NPT ensembles. 

429 For the Nosé–Hoover (chain) thermostat, furthermore, the energies of the 

430 thermostats are also added. 

431 

432 """ 

433 hartree = units['Eh'] 

434 if atoms.calc is None: 

435 return 

436 if atoms.calc.results.get('free_energy') is None: 

437 return 

438 potential = atoms.calc.results.get('free_energy') / hartree 

439 kinetic = atoms.get_kinetic_energy() / hartree 

440 pv = pressure * atoms.get_volume() / hartree 

441 fd.write(18 * ' ') 

442 fd.write(f' {_format_float(potential)}') 

443 fd.write(f' {_format_float(potential + kinetic + pv)}') 

444 fd.write(f' {_format_float(kinetic)}') 

445 fd.write(' <-- E\n') 

446 

447 

448def _write_temperature(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

449 """Write temperature (in hartree) in a CASTEP .md file. 

450 

451 CASTEP writes the temperature in a .md file with 3`N` degrees of freedom 

452 regardless of the given constraints including the fixed center of mass 

453 (``FIX_COM`` which is by default ``TRUE`` in many cases). 

454 To get the consistent result with the above behavior of CASTEP using 

455 this method, we must set the corresponding constraints (e.g. ``FixCom``) 

456 to the ``Atoms`` object beforehand. 

457 """ 

458 hartree = units['Eh'] # eV 

459 boltzmann = units['kB'] # eV/K 

460 temperature = atoms.get_temperature() * boltzmann / hartree 

461 fd.write(18 * ' ') 

462 fd.write(f' {_format_float(temperature)}') 

463 fd.write(54 * ' ') 

464 fd.write(' <-- T\n') 

465 

466 

467def _write_cell(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

468 bohr = units['a0'] 

469 cell = atoms.cell / bohr # in bohr 

470 for i in range(3): 

471 fd.write(18 * ' ') 

472 for j in range(3): 

473 fd.write(f' {_format_float(cell[i, j])}') 

474 fd.write(' <-- h\n') 

475 

476 

477def _write_cell_velocities(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

478 pass # TODO: to be implemented 

479 

480 

481def _write_stress(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

482 if atoms.calc is None: 

483 return 

484 

485 stress = atoms.calc.results.get('stress') 

486 if stress is None: 

487 return 

488 

489 if stress.shape != (3, 3): 

490 stress = voigt_6_to_full_3x3_stress(stress) 

491 

492 stress += atoms.get_kinetic_stress(voigt=False) 

493 

494 hartree = units['Eh'] 

495 bohr = units['a0'] 

496 stress = stress / (hartree / bohr**3) 

497 

498 for i in range(3): 

499 fd.write(18 * ' ') 

500 for j in range(3): 

501 fd.write(f' {_format_float(stress[i, j])}') 

502 fd.write(' <-- S\n') 

503 

504 

505def _write_positions(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

506 bohr = units['a0'] 

507 positions = atoms.positions / bohr 

508 symbols = atoms.symbols 

509 indices = symbols.species_indices() 

510 for i, symbol, position in zip(indices, symbols, positions): 

511 fd.write(f' {symbol:8s}') 

512 fd.write(f' {i + 1:8d}') 

513 for j in range(3): 

514 fd.write(f' {_format_float(position[j])}') 

515 fd.write(' <-- R\n') 

516 

517 

518def _write_forces(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

519 if atoms.calc is None: 

520 return 

521 

522 forces = atoms.calc.results.get('forces') 

523 if forces is None: 

524 return 

525 

526 hartree = units['Eh'] 

527 bohr = units['a0'] 

528 forces = forces / (hartree / bohr) 

529 

530 symbols = atoms.symbols 

531 indices = symbols.species_indices() 

532 for i, symbol, force in zip(indices, symbols, forces): 

533 fd.write(f' {symbol:8s}') 

534 fd.write(f' {i + 1:8d}') 

535 for j in range(3): 

536 fd.write(f' {_format_float(force[j])}') 

537 fd.write(' <-- F\n') 

538 

539 

540def _write_velocities(fd: TextIO, atoms: Atoms, units: dict[str, float]): 

541 hartree = units['Eh'] 

542 me = units['me'] 

543 velocities = atoms.get_velocities() / np.sqrt(hartree / me) 

544 symbols = atoms.symbols 

545 indices = symbols.species_indices() 

546 for i, symbol, velocity in zip(indices, symbols, velocities): 

547 fd.write(f' {symbol:8s}') 

548 fd.write(f' {i + 1:8d}') 

549 for j in range(3): 

550 fd.write(f' {_format_float(velocity[j])}') 

551 fd.write(' <-- V\n')