Coverage for /builds/ase/ase/ase/io/castep/castep_reader.py: 91.15%

339 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3import io 

4import re 

5import warnings 

6from collections import defaultdict 

7from typing import Any, Dict, List, Optional 

8 

9import numpy as np 

10 

11from ase import Atoms, units 

12from ase.calculators.singlepoint import SinglePointCalculator 

13from ase.constraints import FixAtoms, FixCartesian, FixConstraint 

14from ase.io import ParseError 

15from ase.utils import reader, string2index 

16 

17 

18@reader 

19def read_castep_castep(fd, index=-1): 

20 """Read a .castep file and returns an Atoms object. 

21 

22 The calculator information will be stored in the calc attribute. 

23 

24 Notes 

25 ----- 

26 This routine will return an atom ordering as found within the castep file. 

27 This means that the species will be ordered by ascending atomic numbers. 

28 The atoms witin a species are ordered as given in the original cell file. 

29 

30 """ 

31 # look for last result, if several CASTEP run are appended 

32 line_start, line_end, end_found = _find_last_record(fd) 

33 if not end_found: 

34 raise ParseError(f'No regular end found in {fd.name} file.') 

35 

36 # These variables are finally assigned to `SinglePointCalculator` 

37 # for backward compatibility with the `Castep` calculator. 

38 cut_off_energy = None 

39 kpoints = None 

40 total_time = None 

41 peak_memory = None 

42 

43 # jump back to the beginning to the last record 

44 fd.seek(0) 

45 for i, line in enumerate(fd): 

46 if i == line_start: 

47 break 

48 

49 # read header 

50 parameters_header = _read_header(fd) 

51 if 'cut_off_energy' in parameters_header: 

52 cut_off_energy = parameters_header['cut_off_energy'] 

53 if 'basis_precision' in parameters_header: 

54 del parameters_header['cut_off_energy'] # avoid conflict 

55 

56 markers_new_iteration = [ 

57 'BFGS: starting iteration', 

58 'BFGS: improving iteration', 

59 'Starting MD iteration', 

60 ] 

61 

62 images = [] 

63 

64 results = {} 

65 constraints = [] 

66 species_pot = [] 

67 castep_warnings = [] 

68 for i, line in enumerate(fd): 

69 

70 if i > line_end: 

71 break 

72 

73 if 'Number of kpoints used' in line: 

74 kpoints = int(line.split('=')[-1].strip()) 

75 elif 'Unit Cell' in line: 

76 lattice_real = _read_unit_cell(fd) 

77 elif 'Cell Contents' in line: 

78 for line in fd: 

79 if 'Total number of ions in cell' in line: 

80 n_atoms = int(line.split()[7]) 

81 if 'Total number of species in cell' in line: 

82 int(line.split()[7]) 

83 fields = line.split() 

84 if len(fields) == 0: 

85 break 

86 elif 'Fractional coordinates of atoms' in line: 

87 species, custom_species, positions_frac = \ 

88 _read_fractional_coordinates(fd, n_atoms) 

89 elif 'Files used for pseudopotentials' in line: 

90 for line in fd: 

91 line = fd.readline() 

92 if 'Pseudopotential generated on-the-fly' in line: 

93 continue 

94 fields = line.split() 

95 if len(fields) == 2: 

96 species_pot.append(fields) 

97 else: 

98 break 

99 elif 'k-Points For BZ Sampling' in line: 

100 # TODO: generalize for non-Monkhorst Pack case 

101 # (i.e. kpoint lists) - 

102 # kpoints_offset cannot be read this way and 

103 # is hence always set to None 

104 for line in fd: 

105 if not line.strip(): 

106 break 

107 if 'MP grid size for SCF calculation' in line: 

108 # kpoints = ' '.join(line.split()[-3:]) 

109 # self.kpoints_mp_grid = kpoints 

110 # self.kpoints_mp_offset = '0. 0. 0.' 

111 # not set here anymore because otherwise 

112 # two calculator objects go out of sync 

113 # after each calculation triggering unnecessary 

114 # recalculation 

115 break 

116 

117 elif 'Final energy' in line: 

118 key = 'energy_without_dispersion_correction' 

119 results[key] = float(line.split()[-2]) 

120 elif 'Final free energy' in line: 

121 key = 'free_energy_without_dispersion_correction' 

122 results[key] = float(line.split()[-2]) 

123 elif 'NB est. 0K energy' in line: 

124 key = 'energy_zero_without_dispersion_correction' 

125 results[key] = float(line.split()[-2]) 

126 

127 # Add support for dispersion correction 

128 # filtering due to SEDC is done in get_potential_energy 

129 elif 'Dispersion corrected final energy' in line: 

130 key = 'energy_with_dispersion_correlation' 

131 results[key] = float(line.split()[-2]) 

132 elif 'Dispersion corrected final free energy' in line: 

133 key = 'free_energy_with_dispersion_correlation' 

134 results[key] = float(line.split()[-2]) 

135 elif 'NB dispersion corrected est. 0K energy' in line: 

136 key = 'energy_zero_with_dispersion_correlation' 

137 results[key] = float(line.split()[-2]) 

138 

139 # check if we had a finite basis set correction 

140 elif 'Total energy corrected for finite basis set' in line: 

141 key = 'energy_with_finite_basis_set_correction' 

142 results[key] = float(line.split()[-2]) 

143 

144 # ******************** Forces ********************* 

145 # ************** Symmetrised Forces *************** 

146 # ******************** Constrained Forces ******************** 

147 # ******************* Unconstrained Forces ******************* 

148 elif re.search(r'\**.* Forces \**', line): 

149 forces, constraints = _read_forces(fd, n_atoms) 

150 results['forces'] = np.array(forces) 

151 

152 # ***************** Stress Tensor ***************** 

153 # *********** Symmetrised Stress Tensor *********** 

154 elif re.search(r'\**.* Stress Tensor \**', line): 

155 results.update(_read_stress(fd)) 

156 

157 elif any(_ in line for _ in markers_new_iteration): 

158 _add_atoms( 

159 images, 

160 lattice_real, 

161 species, 

162 custom_species, 

163 positions_frac, 

164 constraints, 

165 results, 

166 ) 

167 # reset for the next step 

168 lattice_real = None 

169 species = None 

170 positions_frac = None 

171 results = {} 

172 constraints = [] 

173 

174 # extract info from the Mulliken analysis 

175 elif 'Atomic Populations' in line: 

176 results.update(_read_mulliken_charges(fd)) 

177 

178 # extract detailed Hirshfeld analysis (iprint > 1) 

179 elif 'Hirshfeld total electronic charge (e)' in line: 

180 results.update(_read_hirshfeld_details(fd, n_atoms)) 

181 

182 elif 'Hirshfeld Analysis' in line: 

183 results.update(_read_hirshfeld_charges(fd)) 

184 

185 # There is actually no good reason to get out of the loop 

186 # already at this point... or do I miss something? 

187 # elif 'BFGS: Final Configuration:' in line: 

188 # break 

189 elif 'warn' in line.lower(): 

190 castep_warnings.append(line) 

191 

192 # fetch some last info 

193 elif 'Total time' in line: 

194 pattern = r'.*=\s*([\d\.]+) s' 

195 total_time = float(re.search(pattern, line).group(1)) 

196 

197 elif 'Peak Memory Use' in line: 

198 pattern = r'.*=\s*([\d]+) kB' 

199 peak_memory = int(re.search(pattern, line).group(1)) 

200 

201 # add the last image 

202 _add_atoms( 

203 images, 

204 lattice_real, 

205 species, 

206 custom_species, 

207 positions_frac, 

208 constraints, 

209 results, 

210 ) 

211 

212 for atoms in images: 

213 # these variables are temporarily assigned to `SinglePointCalculator` 

214 # to be assigned to the `Castep` calculator for backward compatibility 

215 atoms.calc._cut_off_energy = cut_off_energy 

216 atoms.calc._kpoints = kpoints 

217 atoms.calc._species_pot = species_pot 

218 atoms.calc._total_time = total_time 

219 atoms.calc._peak_memory = peak_memory 

220 atoms.calc._parameters_header = parameters_header 

221 

222 if castep_warnings: 

223 warnings.warn('WARNING: .castep file contains warnings') 

224 for warning in castep_warnings: 

225 warnings.warn(warning) 

226 

227 if isinstance(index, str): 

228 index = string2index(index) 

229 

230 return images[index] 

231 

232 

233def _find_last_record(fd): 

234 """Find the last record of the .castep file. 

235 

236 Returns 

237 ------- 

238 start : int 

239 Line number of the first line of the last record. 

240 end : int 

241 Line number of the last line of the last record. 

242 end_found : bool 

243 True if the .castep file ends as expected. 

244 

245 """ 

246 start = -1 

247 for i, line in enumerate(fd): 

248 if (('Welcome' in line or 'Materials Studio' in line) 

249 and 'CASTEP' in line): 

250 start = i 

251 

252 if start < 0: 

253 warnings.warn( 

254 f'Could not find CASTEP label in result file: {fd.name}.' 

255 ' Are you sure this is a .castep file?' 

256 ) 

257 return None 

258 

259 # search for regular end of file 

260 end_found = False 

261 # start to search from record beginning from the back 

262 # and see if 

263 end = -1 

264 fd.seek(0) 

265 for i, line in enumerate(fd): 

266 if i < start: 

267 continue 

268 if 'Finalisation time =' in line: 

269 end_found = True 

270 end = i 

271 break 

272 

273 return (start, end, end_found) 

274 

275 

276def _read_header(out: io.TextIOBase): 

277 """Read the header blocks from a .castep file. 

278 

279 Returns 

280 ------- 

281 parameters : dict 

282 Dictionary storing keys and values of a .param file. 

283 """ 

284 def _parse_on_off(_: str): 

285 return {'on': True, 'off': False}[_] 

286 

287 read_title = False 

288 parameters: Dict[str, Any] = {} 

289 for line in out: 

290 if len(line) == 0: # end of file 

291 break 

292 if re.search(r'^\s*\*+$', line) and read_title: # end of header 

293 break 

294 

295 if re.search(r'\**.* Title \**', line): 

296 read_title = True 

297 

298 # General Parameters 

299 

300 elif 'output verbosity' in line: 

301 parameters['iprint'] = int(line.split()[-1][1]) 

302 elif re.match(r'\stype of calculation\s*:', line): 

303 parameters['task'] = { 

304 'single point energy': 'SinglePoint', 

305 'geometry optimization': 'GeometryOptimization', 

306 'band structure': 'BandStructure', 

307 'molecular dynamics': 'MolecularDynamics', 

308 'optical properties': 'Optics', 

309 'phonon calculation': 'Phonon', 

310 'E-field calculation': 'Efield', 

311 'Phonon followed by E-field': 'Phonon+Efield', 

312 'transition state search': 'TransitionStateSearch', 

313 'Magnetic Resonance': 'MagRes', 

314 'Core level spectra': 'Elnes', 

315 'Electronic Spectroscopy': 'ElectronicSpectroscopy', 

316 }[line.split(':')[-1].strip()] 

317 elif 'stress calculation' in line: 

318 parameters['calculate_stress'] = _parse_on_off(line.split()[-1]) 

319 elif 'calculation limited to maximum' in line: 

320 parameters['run_time'] = float(line.split()[-2]) 

321 elif re.match(r'\soptimization strategy\s*:', line): 

322 parameters['opt_strategy'] = { 

323 'maximize speed(+++)': 'Speed', 

324 'minimize memory(---)': 'Memory', 

325 'balance speed and memory': 'Default', 

326 }[line.split(':')[-1].strip()] 

327 

328 # Exchange-Correlation Parameters 

329 elif re.match(r'\susing functional\s*:', line): 

330 functional_abbrevs = { 

331 'Local Density Approximation': 'LDA', 

332 'Perdew Wang (1991)': 'PW91', 

333 'Perdew Burke Ernzerhof': 'PBE', 

334 'revised Perdew Burke Ernzerhof': 'RPBE', 

335 'PBE with Wu-Cohen exchange': 'WC', 

336 'PBE for solids (2008)': 'PBESOL', 

337 'Hartree-Fock': 'HF', 

338 'Hartree-Fock +': 'HF-LDA', 

339 'Screened Hartree-Fock': 'sX', 

340 'Screened Hartree-Fock + ': 'sX-LDA', 

341 'hybrid PBE0': 'PBE0', 

342 'hybrid B3LYP': 'B3LYP', 

343 'hybrid HSE03': 'HSE03', 

344 'hybrid HSE06': 'HSE06', 

345 'RSCAN': 'RSCAN', 

346 } 

347 

348 # If the name is not recognised, use the whole string. 

349 # This won't work in a new calculation, so will need to load from 

350 # .param file in such cases... but at least it will fail rather 

351 # than use the wrong XC! 

352 _xc_full_name = line.split(':')[-1].strip() 

353 parameters['xc_functional'] = functional_abbrevs.get( 

354 _xc_full_name, _xc_full_name) 

355 

356 elif 'DFT+D: Semi-empirical dispersion correction' in line: 

357 parameters['sedc_apply'] = _parse_on_off(line.split()[-1]) 

358 elif 'SEDC with' in line: 

359 parameters['sedc_scheme'] = { 

360 'OBS correction scheme': 'OBS', 

361 'G06 correction scheme': 'G06', 

362 'D3 correction scheme': 'D3', 

363 'D3(BJ) correction scheme': 'D3-BJ', 

364 'D4 correction scheme': 'D4', 

365 'JCHS correction scheme': 'JCHS', 

366 'TS correction scheme': 'TS', 

367 'TSsurf correction scheme': 'TSSURF', 

368 'TS+SCS correction scheme': 'TSSCS', 

369 'aperiodic TS+SCS correction scheme': 'ATSSCS', 

370 'aperiodic MBD@SCS method': 'AMBD', 

371 'MBD@SCS method': 'MBD', 

372 'aperiodic MBD@rsSCS method': 'AMBD*', 

373 'MBD@rsSCS method': 'MBD*', 

374 'XDM correction scheme': 'XDM', 

375 }[line.split(':')[-1].strip()] 

376 

377 # Basis Set Parameters 

378 

379 elif 'basis set accuracy' in line: 

380 parameters['basis_precision'] = line.split()[-1] 

381 elif 'plane wave basis set cut-off' in line: 

382 parameters['cut_off_energy'] = float(line.split()[-2]) 

383 elif re.match(r'\sfinite basis set correction\s*:', line): 

384 parameters['finite_basis_corr'] = { 

385 'none': 0, 

386 'manual': 1, 

387 'automatic': 2, 

388 }[line.split()[-1]] 

389 

390 # Electronic Parameters 

391 

392 elif 'treating system as spin-polarized' in line: 

393 parameters['spin_polarized'] = True 

394 

395 # Electronic Minimization Parameters 

396 

397 elif 'Treating system as non-metallic' in line: 

398 parameters['fix_occupancy'] = True 

399 elif 'total energy / atom convergence tol.' in line: 

400 parameters['elec_energy_tol'] = float(line.split()[-2]) 

401 elif 'convergence tolerance window' in line: 

402 parameters['elec_convergence_win'] = int(line.split()[-2]) 

403 elif 'max. number of SCF cycles:' in line: 

404 parameters['max_scf_cycles'] = float(line.split()[-1]) 

405 elif 'dump wavefunctions every' in line: 

406 parameters['num_dump_cycles'] = float(line.split()[-3]) 

407 

408 # Density Mixing Parameters 

409 

410 elif 'density-mixing scheme' in line: 

411 parameters['mixing_scheme'] = line.split()[-1] 

412 

413 return parameters 

414 

415 

416def _read_unit_cell(out: io.TextIOBase): 

417 """Read a Unit Cell block from a .castep file.""" 

418 for line in out: 

419 fields = line.split() 

420 if len(fields) == 6: 

421 break 

422 lattice_real = [] 

423 for _ in range(3): 

424 lattice_real.append([float(f) for f in fields[0:3]]) 

425 line = out.readline() 

426 fields = line.split() 

427 return np.array(lattice_real) 

428 

429 

430def _read_forces(out: io.TextIOBase, n_atoms: int): 

431 """Read a block for atomic forces from a .castep file.""" 

432 constraints: List[FixConstraint] = [] 

433 forces = [] 

434 for line in out: 

435 fields = line.split() 

436 if len(fields) == 7: 

437 break 

438 for n in range(n_atoms): 

439 consd = np.array([0, 0, 0]) 

440 fxyz = [0.0, 0.0, 0.0] 

441 for i, force_component in enumerate(fields[-4:-1]): 

442 if force_component.count("(cons'd)") > 0: 

443 consd[i] = 1 

444 # remove constraint labels in force components 

445 fxyz[i] = float(force_component.replace("(cons'd)", '')) 

446 if consd.all(): 

447 constraints.append(FixAtoms(n)) 

448 elif consd.any(): 

449 constraints.append(FixCartesian(n, consd)) 

450 forces.append(fxyz) 

451 line = out.readline() 

452 fields = line.split() 

453 return forces, constraints 

454 

455 

456def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int): 

457 """Read fractional coordinates from a .castep file.""" 

458 species: List[str] = [] 

459 custom_species: Optional[List[str]] = None # A CASTEP special thing 

460 positions_frac: List[List[float]] = [] 

461 for line in out: 

462 fields = line.split() 

463 if len(fields) == 7: 

464 break 

465 for _ in range(n_atoms): 

466 spec_custom = fields[1].split(':', 1) 

467 elem = spec_custom[0] 

468 if len(spec_custom) > 1 and custom_species is None: 

469 # Add it to the custom info! 

470 custom_species = list(species) 

471 species.append(elem) 

472 if custom_species is not None: 

473 custom_species.append(fields[1]) 

474 positions_frac.append([float(s) for s in fields[3:6]]) 

475 line = out.readline() 

476 fields = line.split() 

477 return species, custom_species, positions_frac 

478 

479 

480def _read_stress(out: io.TextIOBase): 

481 """Read a block for the stress tensor from a .castep file.""" 

482 for line in out: 

483 fields = line.split() 

484 if len(fields) == 6: 

485 break 

486 results = {} 

487 stress = [] 

488 for _ in range(3): 

489 stress.append([float(s) for s in fields[2:5]]) 

490 line = out.readline() 

491 fields = line.split() 

492 # stress in .castep file is given in GPa 

493 results['stress'] = np.array(stress) * units.GPa 

494 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]] 

495 line = out.readline() 

496 if "Pressure:" in line: 

497 results['pressure'] = float( 

498 line.split()[-2]) * units.GPa # type: ignore[assignment] 

499 return results 

500 

501 

502def _add_atoms( 

503 images, 

504 lattice_real, 

505 species, 

506 custom_species, 

507 positions_frac, 

508 constraints, 

509 results, 

510): 

511 # If all the lattice parameters are fixed, 

512 # the `Unit Cell` block in the .castep file is not printed 

513 # in every ionic step. 

514 # The lattice paramters are therefore taken from the last step. 

515 # This happens: 

516 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE` 

517 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT` 

518 if lattice_real is None: 

519 lattice_real = images[-1].cell.copy() 

520 

521 # for highly symmetric systems (where essentially only the 

522 # stress is optimized, but the atomic positions) positions 

523 # are only printed once. 

524 if species is None: 

525 species = images[-1].symbols 

526 if positions_frac is None: 

527 positions_frac = images[-1].get_scaled_positions() 

528 

529 _set_energy_and_free_energy(results) 

530 

531 atoms = Atoms( 

532 species, 

533 cell=lattice_real, 

534 constraint=constraints, 

535 pbc=True, 

536 scaled_positions=positions_frac, 

537 ) 

538 if custom_species is not None: 

539 atoms.new_array( 

540 'castep_custom_species', 

541 np.array(custom_species), 

542 ) 

543 

544 atoms.calc = SinglePointCalculator(atoms) 

545 atoms.calc.results = results 

546 

547 images.append(atoms) 

548 

549 

550def _read_mulliken_charges(out: io.TextIOBase): 

551 """Read a block for Mulliken charges from a .castep file.""" 

552 for i in range(3): 

553 line = out.readline() 

554 if i == 1: 

555 spin_polarized = 'Spin' in line 

556 results = defaultdict(list) 

557 for line in out: 

558 fields = line.split() 

559 if len(fields) == 1: 

560 break 

561 if spin_polarized: 

562 if len(fields) != 7: # due to CASTEP 18 outformat changes 

563 results['charges'].append(float(fields[-2])) 

564 results['magmoms'].append(float(fields[-1])) 

565 else: 

566 results['charges'].append(float(fields[-1])) 

567 return {k: np.array(v) for k, v in results.items()} 

568 

569 

570def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int): 

571 """Read the Hirshfeld analysis when iprint > 1 from a .castep file.""" 

572 results = defaultdict(list) 

573 for _ in range(n_atoms): 

574 for line in out: 

575 if line.strip() == '': 

576 break # end for each atom 

577 if 'Hirshfeld / free atomic volume :' in line: 

578 line = out.readline() 

579 fields = line.split() 

580 results['hirshfeld_volume_ratios'].append(float(fields[0])) 

581 return {k: np.array(v) for k, v in results.items()} 

582 

583 

584def _read_hirshfeld_charges(out: io.TextIOBase): 

585 """Read a block for Hirshfeld charges from a .castep file.""" 

586 for i in range(3): 

587 line = out.readline() 

588 if i == 1: 

589 spin_polarized = 'Spin' in line 

590 results = defaultdict(list) 

591 for line in out: 

592 fields = line.split() 

593 if len(fields) == 1: 

594 break 

595 if spin_polarized: 

596 results['hirshfeld_charges'].append(float(fields[-2])) 

597 results['hirshfeld_magmoms'].append(float(fields[-1])) 

598 else: 

599 results['hirshfeld_charges'].append(float(fields[-1])) 

600 return {k: np.array(v) for k, v in results.items()} 

601 

602 

603def _set_energy_and_free_energy(results: Dict[str, Any]): 

604 """Set values referred to as `energy` and `free_energy`.""" 

605 if 'energy_with_dispersion_correction' in results: 

606 suffix = '_with_dispersion_correction' 

607 else: 

608 suffix = '_without_dispersion_correction' 

609 

610 if 'free_energy' + suffix in results: # metallic 

611 keye = 'energy_zero' + suffix 

612 keyf = 'free_energy' + suffix 

613 else: # non-metallic 

614 # The finite basis set correction is applied to the energy at finite T 

615 # (not the energy at 0 K). We should hence refer to the corrected value 

616 # as `energy` only when the free energy is unavailable, i.e., only when 

617 # FIX_OCCUPANCY : TRUE and thus no smearing is applied. 

618 if 'energy_with_finite_basis_set_correction' in results: 

619 keye = 'energy_with_finite_basis_set_correction' 

620 else: 

621 keye = 'energy' + suffix 

622 keyf = 'energy' + suffix 

623 

624 results['energy'] = results[keye] 

625 results['free_energy'] = results[keyf]