Coverage for /builds/ase/ase/ase/io/castep/castep

1# fmt: off

3import io

4import re

5import warnings

6from collections import defaultdict

7from typing import Any, Dict, List, Optional

9import numpy as np

11from ase import Atoms, units

12from ase.calculators.singlepoint import SinglePointCalculator

13from ase.constraints import FixAtoms, FixCartesian, FixConstraint

14from ase.io import ParseError

15from ase.utils import reader, string2index

18@reader

19def read_castep_castep(fd, index=-1):

20 """Read a .castep file and returns an Atoms object.

22 The calculator information will be stored in the calc attribute.

24 Notes

25 -----

26 This routine will return an atom ordering as found within the castep file.

27 This means that the species will be ordered by ascending atomic numbers.

28 The atoms witin a species are ordered as given in the original cell file.

30 """

31 # look for last result, if several CASTEP run are appended

32 line_start, line_end, end_found = _find_last_record(fd)

33 if not end_found:

34 raise ParseError(f'No regular end found in {fd.name} file.')

36 # These variables are finally assigned to `SinglePointCalculator`

37 # for backward compatibility with the `Castep` calculator.

38 cut_off_energy = None

39 kpoints = None

40 total_time = None

41 peak_memory = None

43 # jump back to the beginning to the last record

44 fd.seek(0)

45 for i, line in enumerate(fd):

46 if i == line_start:

47 break

49 # read header

50 parameters_header = _read_header(fd)

51 if 'cut_off_energy' in parameters_header:

52 cut_off_energy = parameters_header['cut_off_energy']

53 if 'basis_precision' in parameters_header:

54 del parameters_header['cut_off_energy'] # avoid conflict

56 markers_new_iteration = [

57 'BFGS: starting iteration',

58 'BFGS: improving iteration',

59 'Starting MD iteration',

60 ]

62 images = []

64 results = {}

65 constraints = []

66 species_pot = []

67 castep_warnings = []

68 for i, line in enumerate(fd):

70 if i > line_end:

71 break

73 if 'Number of kpoints used' in line:

74 kpoints = int(line.split('=')[-1].strip())

75 elif 'Unit Cell' in line:

76 lattice_real = _read_unit_cell(fd)

77 elif 'Cell Contents' in line:

78 for line in fd:

79 if 'Total number of ions in cell' in line:

80 n_atoms = int(line.split()[7])

81 if 'Total number of species in cell' in line:

82 int(line.split()[7])

83 fields = line.split()

84 if len(fields) == 0:

85 break

86 elif 'Fractional coordinates of atoms' in line:

87 species, custom_species, positions_frac = \

88 _read_fractional_coordinates(fd, n_atoms)

89 elif 'Files used for pseudopotentials' in line:

90 for line in fd:

91 line = fd.readline()

92 if 'Pseudopotential generated on-the-fly' in line:

93 continue

94 fields = line.split()

95 if len(fields) == 2:

96 species_pot.append(fields)

97 else:

98 break

99 elif 'k-Points For BZ Sampling' in line:

100 # TODO: generalize for non-Monkhorst Pack case

101 # (i.e. kpoint lists) -

102 # kpoints_offset cannot be read this way and

103 # is hence always set to None

104 for line in fd:

105 if not line.strip():

106 break

107 if 'MP grid size for SCF calculation' in line:

108 # kpoints = ' '.join(line.split()[-3:])

109 # self.kpoints_mp_grid = kpoints

110 # self.kpoints_mp_offset = '0. 0. 0.'

111 # not set here anymore because otherwise

112 # two calculator objects go out of sync

113 # after each calculation triggering unnecessary

114 # recalculation

115 break

116

117 elif 'Final energy' in line:

118 key = 'energy_without_dispersion_correction'

119 results[key] = float(line.split()[-2])

120 elif 'Final free energy' in line:

121 key = 'free_energy_without_dispersion_correction'

122 results[key] = float(line.split()[-2])

123 elif 'NB est. 0K energy' in line:

124 key = 'energy_zero_without_dispersion_correction'

125 results[key] = float(line.split()[-2])

126

127 # Add support for dispersion correction

128 # filtering due to SEDC is done in get_potential_energy

129 elif 'Dispersion corrected final energy' in line:

130 key = 'energy_with_dispersion_correlation'

131 results[key] = float(line.split()[-2])

132 elif 'Dispersion corrected final free energy' in line:

133 key = 'free_energy_with_dispersion_correlation'

134 results[key] = float(line.split()[-2])

135 elif 'NB dispersion corrected est. 0K energy' in line:

136 key = 'energy_zero_with_dispersion_correlation'

137 results[key] = float(line.split()[-2])

138

139 # check if we had a finite basis set correction

140 elif 'Total energy corrected for finite basis set' in line:

141 key = 'energy_with_finite_basis_set_correction'

142 results[key] = float(line.split()[-2])

143

144 # ******************** Forces *********************

145 # ************** Symmetrised Forces ***************

146 # ******************** Constrained Forces ********************

147 # ******************* Unconstrained Forces *******************

148 elif re.search(r'\**.* Forces \**', line):

149 forces, constraints = _read_forces(fd, n_atoms)

150 results['forces'] = np.array(forces)

151

152 # ***************** Stress Tensor *****************

153 # *********** Symmetrised Stress Tensor ***********

154 elif re.search(r'\**.* Stress Tensor \**', line):

155 results.update(_read_stress(fd))

156

157 elif any(_ in line for _ in markers_new_iteration):

158 _add_atoms(

159 images,

160 lattice_real,

161 species,

162 custom_species,

163 positions_frac,

164 constraints,

165 results,

166 )

167 # reset for the next step

168 lattice_real = None

169 species = None

170 positions_frac = None

171 results = {}

172 constraints = []

173

174 # extract info from the Mulliken analysis

175 elif 'Atomic Populations' in line:

176 results.update(_read_mulliken_charges(fd))

177

178 # extract detailed Hirshfeld analysis (iprint > 1)

179 elif 'Hirshfeld total electronic charge (e)' in line:

180 results.update(_read_hirshfeld_details(fd, n_atoms))

181

182 elif 'Hirshfeld Analysis' in line:

183 results.update(_read_hirshfeld_charges(fd))

184

185 # There is actually no good reason to get out of the loop

186 # already at this point... or do I miss something?

187 # elif 'BFGS: Final Configuration:' in line:

188 # break

189 elif 'warn' in line.lower():

190 castep_warnings.append(line)

191

192 # fetch some last info

193 elif 'Total time' in line:

194 pattern = r'.*=\s*([\d\.]+) s'

195 total_time = float(re.search(pattern, line).group(1))

196

197 elif 'Peak Memory Use' in line:

198 pattern = r'.*=\s*([\d]+) kB'

199 peak_memory = int(re.search(pattern, line).group(1))

200

201 # add the last image

202 _add_atoms(

203 images,

204 lattice_real,

205 species,

206 custom_species,

207 positions_frac,

208 constraints,

209 results,

210 )

211

212 for atoms in images:

213 # these variables are temporarily assigned to `SinglePointCalculator`

214 # to be assigned to the `Castep` calculator for backward compatibility

215 atoms.calc._cut_off_energy = cut_off_energy

216 atoms.calc._kpoints = kpoints

217 atoms.calc._species_pot = species_pot

218 atoms.calc._total_time = total_time

219 atoms.calc._peak_memory = peak_memory

220 atoms.calc._parameters_header = parameters_header

221

222 if castep_warnings:

223 warnings.warn('WARNING: .castep file contains warnings')

224 for warning in castep_warnings:

225 warnings.warn(warning)

226

227 if isinstance(index, str):

228 index = string2index(index)

229

230 return images[index]

231

232

233def _find_last_record(fd):

234 """Find the last record of the .castep file.

235

236 Returns

237 -------

238 start : int

239 Line number of the first line of the last record.

240 end : int

241 Line number of the last line of the last record.

242 end_found : bool

243 True if the .castep file ends as expected.

244

245 """

246 start = -1

247 for i, line in enumerate(fd):

248 if (('Welcome' in line or 'Materials Studio' in line)

249 and 'CASTEP' in line):

250 start = i

251

252 if start < 0:

253 warnings.warn(

254 f'Could not find CASTEP label in result file: {fd.name}.'

255 ' Are you sure this is a .castep file?'

256 )

257 return None

258

259 # search for regular end of file

260 end_found = False

261 # start to search from record beginning from the back

262 # and see if

263 end = -1

264 fd.seek(0)

265 for i, line in enumerate(fd):

266 if i < start:

267 continue

268 if 'Finalisation time =' in line:

269 end_found = True

270 end = i

271 break

272

273 return (start, end, end_found)

274

275

276def _read_header(out: io.TextIOBase):

277 """Read the header blocks from a .castep file.

278

279 Returns

280 -------

281 parameters : dict

282 Dictionary storing keys and values of a .param file.

283 """

284 def _parse_on_off(_: str):

285 return {'on': True, 'off': False}[_]

286

287 read_title = False

288 parameters: Dict[str, Any] = {}

289 for line in out:

290 if len(line) == 0: # end of file

291 break

292 if re.search(r'^\s*\*+$', line) and read_title: # end of header

293 break

294

295 if re.search(r'\**.* Title \**', line):

296 read_title = True

297

298 # General Parameters

299

300 elif 'output verbosity' in line:

301 parameters['iprint'] = int(line.split()[-1][1])

302 elif re.match(r'\stype of calculation\s*:', line):

303 parameters['task'] = {

304 'single point energy': 'SinglePoint',

305 'geometry optimization': 'GeometryOptimization',

306 'band structure': 'BandStructure',

307 'molecular dynamics': 'MolecularDynamics',

308 'optical properties': 'Optics',

309 'phonon calculation': 'Phonon',

310 'E-field calculation': 'Efield',

311 'Phonon followed by E-field': 'Phonon+Efield',

312 'transition state search': 'TransitionStateSearch',

313 'Magnetic Resonance': 'MagRes',

314 'Core level spectra': 'Elnes',

315 'Electronic Spectroscopy': 'ElectronicSpectroscopy',

316 }[line.split(':')[-1].strip()]

317 elif 'stress calculation' in line:

318 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])

319 elif 'calculation limited to maximum' in line:

320 parameters['run_time'] = float(line.split()[-2])

321 elif re.match(r'\soptimization strategy\s*:', line):

322 parameters['opt_strategy'] = {

323 'maximize speed(+++)': 'Speed',

324 'minimize memory(---)': 'Memory',

325 'balance speed and memory': 'Default',

326 }[line.split(':')[-1].strip()]

327

328 # Exchange-Correlation Parameters

329 elif re.match(r'\susing functional\s*:', line):

330 functional_abbrevs = {

331 'Local Density Approximation': 'LDA',

332 'Perdew Wang (1991)': 'PW91',

333 'Perdew Burke Ernzerhof': 'PBE',

334 'revised Perdew Burke Ernzerhof': 'RPBE',

335 'PBE with Wu-Cohen exchange': 'WC',

336 'PBE for solids (2008)': 'PBESOL',

337 'Hartree-Fock': 'HF',

338 'Hartree-Fock +': 'HF-LDA',

339 'Screened Hartree-Fock': 'sX',

340 'Screened Hartree-Fock + ': 'sX-LDA',

341 'hybrid PBE0': 'PBE0',

342 'hybrid B3LYP': 'B3LYP',

343 'hybrid HSE03': 'HSE03',

344 'hybrid HSE06': 'HSE06',

345 'RSCAN': 'RSCAN',

346 }

347

348 # If the name is not recognised, use the whole string.

349 # This won't work in a new calculation, so will need to load from

350 # .param file in such cases... but at least it will fail rather

351 # than use the wrong XC!

352 _xc_full_name = line.split(':')[-1].strip()

353 parameters['xc_functional'] = functional_abbrevs.get(

354 _xc_full_name, _xc_full_name)

355

356 elif 'DFT+D: Semi-empirical dispersion correction' in line:

357 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])

358 elif 'SEDC with' in line:

359 parameters['sedc_scheme'] = {

360 'OBS correction scheme': 'OBS',

361 'G06 correction scheme': 'G06',

362 'D3 correction scheme': 'D3',

363 'D3(BJ) correction scheme': 'D3-BJ',

364 'D4 correction scheme': 'D4',

365 'JCHS correction scheme': 'JCHS',

366 'TS correction scheme': 'TS',

367 'TSsurf correction scheme': 'TSSURF',

368 'TS+SCS correction scheme': 'TSSCS',

369 'aperiodic TS+SCS correction scheme': 'ATSSCS',

370 'aperiodic MBD@SCS method': 'AMBD',

371 'MBD@SCS method': 'MBD',

372 'aperiodic MBD@rsSCS method': 'AMBD*',

373 'MBD@rsSCS method': 'MBD*',

374 'XDM correction scheme': 'XDM',

375 }[line.split(':')[-1].strip()]

376

377 # Basis Set Parameters

378

379 elif 'basis set accuracy' in line:

380 parameters['basis_precision'] = line.split()[-1]

381 elif 'plane wave basis set cut-off' in line:

382 parameters['cut_off_energy'] = float(line.split()[-2])

383 elif re.match(r'\sfinite basis set correction\s*:', line):

384 parameters['finite_basis_corr'] = {

385 'none': 0,

386 'manual': 1,

387 'automatic': 2,

388 }[line.split()[-1]]

389

390 # Electronic Parameters

391

392 elif 'treating system as spin-polarized' in line:

393 parameters['spin_polarized'] = True

394

395 # Electronic Minimization Parameters

396

397 elif 'Treating system as non-metallic' in line:

398 parameters['fix_occupancy'] = True

399 elif 'total energy / atom convergence tol.' in line:

400 parameters['elec_energy_tol'] = float(line.split()[-2])

401 elif 'convergence tolerance window' in line:

402 parameters['elec_convergence_win'] = int(line.split()[-2])

403 elif 'max. number of SCF cycles:' in line:

404 parameters['max_scf_cycles'] = float(line.split()[-1])

405 elif 'dump wavefunctions every' in line:

406 parameters['num_dump_cycles'] = float(line.split()[-3])

407

408 # Density Mixing Parameters

409

410 elif 'density-mixing scheme' in line:

411 parameters['mixing_scheme'] = line.split()[-1]

412

413 return parameters

414

415

416def _read_unit_cell(out: io.TextIOBase):

417 """Read a Unit Cell block from a .castep file."""

418 for line in out:

419 fields = line.split()

420 if len(fields) == 6:

421 break

422 lattice_real = []

423 for _ in range(3):

424 lattice_real.append([float(f) for f in fields[0:3]])

425 line = out.readline()

426 fields = line.split()

427 return np.array(lattice_real)

428

429

430def _read_forces(out: io.TextIOBase, n_atoms: int):

431 """Read a block for atomic forces from a .castep file."""

432 constraints: List[FixConstraint] = []

433 forces = []

434 for line in out:

435 fields = line.split()

436 if len(fields) == 7:

437 break

438 for n in range(n_atoms):

439 consd = np.array([0, 0, 0])

440 fxyz = [0.0, 0.0, 0.0]

441 for i, force_component in enumerate(fields[-4:-1]):

442 if force_component.count("(cons'd)") > 0:

443 consd[i] = 1

444 # remove constraint labels in force components

445 fxyz[i] = float(force_component.replace("(cons'd)", ''))

446 if consd.all():

447 constraints.append(FixAtoms(n))

448 elif consd.any():

449 constraints.append(FixCartesian(n, consd))

450 forces.append(fxyz)

451 line = out.readline()

452 fields = line.split()

453 return forces, constraints

454

455

456def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):

457 """Read fractional coordinates from a .castep file."""

458 species: List[str] = []

459 custom_species: Optional[List[str]] = None # A CASTEP special thing

460 positions_frac: List[List[float]] = []

461 for line in out:

462 fields = line.split()

463 if len(fields) == 7:

464 break

465 for _ in range(n_atoms):

466 spec_custom = fields[1].split(':', 1)

467 elem = spec_custom[0]

468 if len(spec_custom) > 1 and custom_species is None:

469 # Add it to the custom info!

470 custom_species = list(species)

471 species.append(elem)

472 if custom_species is not None:

473 custom_species.append(fields[1])

474 positions_frac.append([float(s) for s in fields[3:6]])

475 line = out.readline()

476 fields = line.split()

477 return species, custom_species, positions_frac

478

479

480def _read_stress(out: io.TextIOBase):

481 """Read a block for the stress tensor from a .castep file."""

482 for line in out:

483 fields = line.split()

484 if len(fields) == 6:

485 break

486 results = {}

487 stress = []

488 for _ in range(3):

489 stress.append([float(s) for s in fields[2:5]])

490 line = out.readline()

491 fields = line.split()

492 # stress in .castep file is given in GPa

493 results['stress'] = np.array(stress) * units.GPa

494 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]

495 line = out.readline()

496 if "Pressure:" in line:

497 results['pressure'] = float(

498 line.split()[-2]) * units.GPa # type: ignore[assignment]

499 return results

500

501

502def _add_atoms(

503 images,

504 lattice_real,

505 species,

506 custom_species,

507 positions_frac,

508 constraints,

509 results,

510):

511 # If all the lattice parameters are fixed,

512 # the `Unit Cell` block in the .castep file is not printed

513 # in every ionic step.

514 # The lattice paramters are therefore taken from the last step.

515 # This happens:

516 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`

517 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`

518 if lattice_real is None:

519 lattice_real = images[-1].cell.copy()

520

521 # for highly symmetric systems (where essentially only the

522 # stress is optimized, but the atomic positions) positions

523 # are only printed once.

524 if species is None:

525 species = images[-1].symbols

526 if positions_frac is None:

527 positions_frac = images[-1].get_scaled_positions()

528

529 _set_energy_and_free_energy(results)

530

531 atoms = Atoms(

532 species,

533 cell=lattice_real,

534 constraint=constraints,

535 pbc=True,

536 scaled_positions=positions_frac,

537 )

538 if custom_species is not None:

539 atoms.new_array(

540 'castep_custom_species',

541 np.array(custom_species),

542 )

543

544 atoms.calc = SinglePointCalculator(atoms)

545 atoms.calc.results = results

546

547 images.append(atoms)

548

549

550def _read_mulliken_charges(out: io.TextIOBase):

551 """Read a block for Mulliken charges from a .castep file."""

552 for i in range(3):

553 line = out.readline()

554 if i == 1:

555 spin_polarized = 'Spin' in line

556 results = defaultdict(list)

557 for line in out:

558 fields = line.split()

559 if len(fields) == 1:

560 break

561 if spin_polarized:

562 if len(fields) != 7: # due to CASTEP 18 outformat changes

563 results['charges'].append(float(fields[-2]))

564 results['magmoms'].append(float(fields[-1]))

565 else:

566 results['charges'].append(float(fields[-1]))

567 return {k: np.array(v) for k, v in results.items()}

568

569

570def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):

571 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""

572 results = defaultdict(list)

573 for _ in range(n_atoms):

574 for line in out:

575 if line.strip() == '':

576 break # end for each atom

577 if 'Hirshfeld / free atomic volume :' in line:

578 line = out.readline()

579 fields = line.split()

580 results['hirshfeld_volume_ratios'].append(float(fields[0]))

581 return {k: np.array(v) for k, v in results.items()}

582

583

584def _read_hirshfeld_charges(out: io.TextIOBase):

585 """Read a block for Hirshfeld charges from a .castep file."""

586 for i in range(3):

587 line = out.readline()

588 if i == 1:

589 spin_polarized = 'Spin' in line

590 results = defaultdict(list)

591 for line in out:

592 fields = line.split()

593 if len(fields) == 1:

594 break

595 if spin_polarized:

596 results['hirshfeld_charges'].append(float(fields[-2]))

597 results['hirshfeld_magmoms'].append(float(fields[-1]))

598 else:

599 results['hirshfeld_charges'].append(float(fields[-1]))

600 return {k: np.array(v) for k, v in results.items()}

601

602

603def _set_energy_and_free_energy(results: Dict[str, Any]):

604 """Set values referred to as `energy` and `free_energy`."""

605 if 'energy_with_dispersion_correction' in results:

606 suffix = '_with_dispersion_correction'

607 else:

608 suffix = '_without_dispersion_correction'

609

610 if 'free_energy' + suffix in results: # metallic

611 keye = 'energy_zero' + suffix

612 keyf = 'free_energy' + suffix

613 else: # non-metallic

614 # The finite basis set correction is applied to the energy at finite T

615 # (not the energy at 0 K). We should hence refer to the corrected value

616 # as `energy` only when the free energy is unavailable, i.e., only when

617 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.

618 if 'energy_with_finite_basis_set_correction' in results:

619 keye = 'energy_with_finite_basis_set_correction'

620 else:

621 keye = 'energy' + suffix

622 keyf = 'energy' + suffix

623

624 results['energy'] = results[keye]

625 results['free_energy'] = results[keyf]

Coverage for /builds/ase/ase/ase/io/castep/castep_reader.py: 91.15%

339 statements