Coverage for /builds/ase/ase/ase/io/orca.py: 91.75%

1import re

2import warnings

3from collections.abc import Iterable, Iterator

4from io import StringIO

5from pathlib import Path

6from typing import List, Optional, Sequence

8import numpy as np

10from ase import Atoms

11from ase.calculators.calculator import PropertyNotImplementedError

12from ase.calculators.singlepoint import SinglePointDFTCalculator

13from ase.io import ParseError, read

14from ase.units import Bohr, Hartree

15from ase.utils import deprecated, reader, writer

18@reader

19def read_geom_orcainp(fd):

20 """Method to read geometry from an ORCA input file."""

21 lines = fd.readlines()

23 # Find geometry region of input file.

24 stopline = 0

25 for index, line in enumerate(lines):

26 if line[1:].startswith('xyz '):

27 startline = index + 1

28 stopline = -1

29 elif line.startswith('end') and stopline == -1:

30 stopline = index

31 elif line.startswith('*') and stopline == -1:

32 stopline = index

33 # Format and send to read_xyz.

34 xyz_text = '%i\n' % (stopline - startline)

35 xyz_text += ' geometry\n'

36 for line in lines[startline:stopline]:

37 xyz_text += line

38 atoms = read(StringIO(xyz_text), format='xyz')

39 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined

41 return atoms

44@writer

45def write_orca(fd, atoms, params):

46 # conventional filename: '<name>.inp'

47 fd.write(f'! {params["orcasimpleinput"]} \n')

48 fd.write(f'{params["orcablocks"]} \n')

50 if 'coords' not in params['orcablocks']:

51 fd.write('*xyz')

52 fd.write(' %d' % params['charge'])

53 fd.write(' %d \n' % params['mult'])

54 for atom in atoms:

55 if atom.tag == 71: # 71 is ascii G (Ghost)

56 symbol = atom.symbol + ' : '

57 else:

58 symbol = atom.symbol + ' '

59 fd.write(

60 symbol

61 + str(atom.position[0])

62 + ' '

63 + str(atom.position[1])

64 + ' '

65 + str(atom.position[2])

66 + '\n'

67 )

68 fd.write('*\n')

71def read_charge(lines: List[str]) -> Optional[float]:

72 """Read sum of atomic charges."""

73 charge = None

74 for line in lines:

75 if 'Sum of atomic charges' in line:

76 charge = float(line.split()[-1])

77 return charge

80def read_energy(lines: List[str]) -> Optional[float]:

81 """Read energy."""

82 energy = None

83 for line in lines:

84 if 'FINAL SINGLE POINT ENERGY' in line:

85 if 'Wavefunction not fully converged' in line:

86 energy = float('nan')

87 else:

88 energy = float(line.split()[-1])

89 if energy is not None:

90 return energy * Hartree

91 return energy

94def read_center_of_mass(lines: List[str]) -> Optional[np.ndarray]:

95 """Scan through text for the center of mass"""

96 # Example:

97 # 'The origin for moment calculation is the CENTER OF MASS =

98 # ( 0.002150, -0.296255 0.086315)'

99 # Note the missing comma in the output

100 com = None

101 for line in lines:

102 if 'The origin for moment calculation is the CENTER OF MASS' in line:

103 line = re.sub(r'[(),]', '', line)

104 com = np.array([float(_) for _ in line.split()[-3:]])

105 if com is not None:

106 return com * Bohr # return the last match

107 return com

108

109

110def read_dipole(lines: List[str]) -> Optional[np.ndarray]:

111 """Read dipole moment.

112

113 Note that the read dipole moment is for the COM frame of reference.

114 """

115 dipole = None

116 for line in lines:

117 if 'Total Dipole Moment' in line:

118 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr

119 return dipole # Return the last match

120

121

122def _read_atoms(lines: Sequence[str]) -> Atoms:

123 """Read atomic positions and symbols. Create Atoms object."""

124 line_start = -1

125 natoms = 0

126

127 for ll, line in enumerate(lines):

128 if 'Number of atoms' in line:

129 natoms = int(line.split()[4])

130 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line:

131 line_start = ll + 2

132

133 # Check if atoms present and if their number is given.

134 if line_start == -1:

135 raise ParseError(

136 'No information about the structure in the ORCA output file.'

137 )

138 elif natoms == 0:

139 raise ParseError(

140 'No information about number of atoms in the ORCA output file.'

141 )

142

143 positions = np.zeros((natoms, 3))

144 symbols = [''] * natoms

145

146 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):

147 inp = line.split()

148 positions[ll, :] = [float(pos) for pos in inp[1:4]]

149 symbols[ll] = inp[0]

150

151 atoms = Atoms(symbols=symbols, positions=positions)

152 atoms.set_pbc([False, False, False])

153

154 return atoms

155

156

157def read_forces(lines: List[str]) -> Optional[np.ndarray]:

158 """Read forces from output file if available. Else return None.

159

160 Taking the forces from the output files (instead of the engrad-file) to

161 be more general. The forces can be present in general output even if

162 the engrad file is not there.

163

164 Note: If more than one geometry relaxation step is available,

165 forces do not always exist for the first step. In this case, for

166 the first step an array of None will be returned. The following

167 relaxation steps will then have forces available.

168 """

169 line_start = -1

170 natoms = 0

171 record_gradient = True

172

173 for ll, line in enumerate(lines):

174 if 'Number of atoms' in line:

175 natoms = int(line.split()[4])

176 # Read in only first set of forces for each chunk

177 # (Excited state calculations can have several sets of

178 # forces per chunk)

179 elif 'CARTESIAN GRADIENT' in line and record_gradient:

180 line_start = ll + 3

181 record_gradient = False

182

183 # Check if number of atoms is available.

184 if natoms == 0:

185 raise ParseError(

186 'No information about number of atoms in the ORCA output file.'

187 )

188

189 # Forces are not always available. If not available, return None.

190 if line_start == -1:

191 return None

192

193 forces = np.zeros((natoms, 3))

194

195 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):

196 inp = line.split()

197 forces[ll, :] = [float(pos) for pos in inp[3:6]]

198

199 forces *= -Hartree / Bohr

200 return forces

201

202

203def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]:

204 """Separate out the chunks for each geometry relaxation step."""

205 finished = False

206 relaxation_finished = False

207 relaxation = False

208

209 chunk_endings = [

210 'ORCA TERMINATED NORMALLY',

211 'ORCA GEOMETRY RELAXATION STEP',

212 ]

213 chunk_lines = []

214 for line in lines:

215 # Assemble chunks

216 if any([ending in line for ending in chunk_endings]):

217 chunk_lines.append(line)

218 yield chunk_lines

219 chunk_lines = []

220 else:

221 chunk_lines.append(line)

222

223 if 'ORCA TERMINATED NORMALLY' in line:

224 finished = True

225

226 if 'THE OPTIMIZATION HAS CONVERGED' in line:

227 relaxation_finished = True

228

229 # Check if calculation is an optimization.

230 if 'ORCA SCF GRADIENT CALCULATION' in line:

231 relaxation = True

232

233 # Give error if calculation not finished for single-point calculations.

234 if not finished and not relaxation:

235 raise ParseError('Error: Calculation did not finish!')

236 # Give warning if calculation not finished for geometry optimizations.

237 elif not finished and relaxation:

238 warnings.warn('Calculation did not finish!')

239 # Calculation may have finished, but relaxation may have not.

240 elif not relaxation_finished and relaxation:

241 warnings.warn('Geometry optimization did not converge!')

242

243

244@reader

245def read_orca_output(fd, index=slice(None)):

246 """From the ORCA output file: Read Energy, positions, forces

247 and dipole moment.

248

249 Create separated atoms object for each geometry frame through

250 parsing the output file in chunks.

251 """

252 images = []

253

254 # Iterate over chunks and create a separate atoms object for each

255 for chunk in get_chunks(fd):

256 energy = read_energy(chunk)

257 atoms = _read_atoms(chunk)

258 forces = read_forces(chunk)

259 dipole = read_dipole(chunk)

260 charge = read_charge(chunk)

261 com = read_center_of_mass(chunk)

262

263 # Correct dipole moment for centre-of-mass

264 if com is not None and dipole is not None:

265 dipole = dipole + com * charge

266

267 atoms.calc = SinglePointDFTCalculator(

268 atoms,

269 energy=energy,

270 free_energy=energy,

271 forces=forces,

272 # stress=self.stress,

273 # stresses=self.stresses,

274 # magmom=self.magmom,

275 dipole=dipole,

276 # dielectric_tensor=self.dielectric_tensor,

277 # polarization=self.polarization,

278 )

279 # collect images

280 images.append(atoms)

281

282 return images[index]

283

284

285@reader

286def read_orca_engrad(fd):

287 """Read Forces from ORCA .engrad file."""

288 getgrad = False

289 gradients = []

290 tempgrad = []

291 for _, line in enumerate(fd):

292 if line.find('# The current gradient') >= 0:

293 getgrad = True

294 gradients = []

295 tempgrad = []

296 continue

297 if getgrad and '#' not in line:

298 grad = line.split()[-1]

299 tempgrad.append(float(grad))

300 if len(tempgrad) == 3:

301 gradients.append(tempgrad)

302 tempgrad = []

303 if '# The at' in line:

304 getgrad = False

305

306 forces = -np.array(gradients) * Hartree / Bohr

307 return forces

308

309

310@deprecated(

311 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n'

312 'from ase.io import read \n'

313 'atoms = read("orca.out")',

314 DeprecationWarning,

315)

316def read_orca_outputs(directory, stdout_path):

317 """Reproduces old functionality of reading energy, forces etc

318 directly from output without creation of atoms object.

319 This is kept to ensure backwards compatability

320 .. deprecated:: 3.24.0

321 Use of read_orca_outputs is deprected, please

322 process ORCA output by using ase.io.read

323 e.g., read('orca.out')"

324 """

325 stdout_path = Path(stdout_path)

326 atoms = read_orca_output(stdout_path, index=-1)

327 results = {}

328 results['energy'] = atoms.get_total_energy()

329 results['free_energy'] = atoms.get_total_energy()

330

331 try:

332 results['dipole'] = atoms.get_dipole_moment()

333 except PropertyNotImplementedError:

334 pass

335

336 # Does engrad always exist? - No!

337 # Will there be other files -No -> We should just take engrad

338 # as a direct argument. Or maybe this function does not even need to

339 # exist.

340 engrad_path = stdout_path.with_suffix('.engrad')

341 if engrad_path.is_file():

342 results['forces'] = read_orca_engrad(engrad_path)

343 print("""Warning: If you are reading in an engrad file from a

344 geometry optimization, check very carefully.

345 ORCA does not by default supply the forces for the

346 converged geometry!""")

347 return results