Coverage for ase/io/orca.py: 91.71%

1import re

2import warnings

3from collections.abc import Iterable, Iterator, Sequence

4from io import StringIO

5from pathlib import Path

7import numpy as np

9from ase import Atoms

10from ase.calculators.calculator import PropertyNotImplementedError

11from ase.calculators.singlepoint import SinglePointDFTCalculator

12from ase.io import ParseError, read

13from ase.units import Bohr, Hartree

14from ase.utils import deprecated, reader, writer

17@reader

18def read_geom_orcainp(fd):

19 """Method to read geometry from an ORCA input file."""

20 lines = fd.readlines()

22 # Find geometry region of input file.

23 stopline = 0

24 for index, line in enumerate(lines):

25 if line[1:].startswith('xyz '):

26 startline = index + 1

27 stopline = -1

28 elif line.startswith('end') and stopline == -1:

29 stopline = index

30 elif line.startswith('*') and stopline == -1:

31 stopline = index

32 # Format and send to read_xyz.

33 xyz_text = '%i\n' % (stopline - startline)

34 xyz_text += ' geometry\n'

35 for line in lines[startline:stopline]:

36 xyz_text += line

37 atoms = read(StringIO(xyz_text), format='xyz')

38 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined

40 return atoms

43@writer

44def write_orca(fd, atoms, params):

45 # conventional filename: '<name>.inp'

46 fd.write(f'! {params["orcasimpleinput"]} \n')

47 fd.write(f'{params["orcablocks"]} \n')

49 if 'coords' not in params['orcablocks']:

50 fd.write('*xyz')

51 fd.write(' %d' % params['charge'])

52 fd.write(' %d \n' % params['mult'])

53 for atom in atoms:

54 if atom.tag == 71: # 71 is ascii G (Ghost)

55 symbol = atom.symbol + ' : '

56 else:

57 symbol = atom.symbol + ' '

58 fd.write(

59 symbol

60 + str(atom.position[0])

61 + ' '

62 + str(atom.position[1])

63 + ' '

64 + str(atom.position[2])

65 + '\n'

66 )

67 fd.write('*\n')

70def read_charge(lines: list[str]) -> float | None:

71 """Read sum of atomic charges."""

72 charge = None

73 for line in lines:

74 if 'Sum of atomic charges' in line:

75 charge = float(line.split()[-1])

76 return charge

79def read_energy(lines: list[str]) -> float | None:

80 """Read energy."""

81 energy = None

82 for line in lines:

83 if 'FINAL SINGLE POINT ENERGY' in line:

84 if 'Wavefunction not fully converged' in line:

85 energy = float('nan')

86 else:

87 energy = float(line.split()[-1])

88 if energy is not None:

89 return energy * Hartree

90 return energy

93def read_center_of_mass(lines: list[str]) -> np.ndarray | None:

94 """Scan through text for the center of mass"""

95 # Example:

96 # 'The origin for moment calculation is the CENTER OF MASS =

97 # ( 0.002150, -0.296255 0.086315)'

98 # Note the missing comma in the output

99 com = None

100 for line in lines:

101 if 'The origin for moment calculation is the CENTER OF MASS' in line:

102 line = re.sub(r'[(),]', '', line)

103 com = np.array([float(_) for _ in line.split()[-3:]])

104 if com is not None:

105 return com * Bohr # return the last match

106 return com

107

108

109def read_dipole(lines: list[str]) -> np.ndarray | None:

110 """Read dipole moment.

111

112 Note that the read dipole moment is for the COM frame of reference.

113 """

114 dipole = None

115 for line in lines:

116 if 'Total Dipole Moment' in line:

117 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr

118 return dipole # Return the last match

119

120

121def _read_atoms(lines: Sequence[str]) -> Atoms:

122 """Read atomic positions and symbols. Create Atoms object."""

123 line_start = -1

124 natoms = 0

125

126 for ll, line in enumerate(lines):

127 if 'Number of atoms' in line:

128 natoms = int(line.split()[4])

129 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line:

130 line_start = ll + 2

131

132 # Check if atoms present and if their number is given.

133 if line_start == -1:

134 raise ParseError(

135 'No information about the structure in the ORCA output file.'

136 )

137 elif natoms == 0:

138 raise ParseError(

139 'No information about number of atoms in the ORCA output file.'

140 )

141

142 positions = np.zeros((natoms, 3))

143 symbols = [''] * natoms

144

145 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):

146 inp = line.split()

147 positions[ll, :] = [float(pos) for pos in inp[1:4]]

148 symbols[ll] = inp[0]

149

150 atoms = Atoms(symbols=symbols, positions=positions)

151 atoms.set_pbc([False, False, False])

152

153 return atoms

154

155

156def read_forces(lines: list[str]) -> np.ndarray | None:

157 """Read forces from output file if available. Else return None.

158

159 Taking the forces from the output files (instead of the engrad-file) to

160 be more general. The forces can be present in general output even if

161 the engrad file is not there.

162

163 Note: If more than one geometry relaxation step is available,

164 forces do not always exist for the first step. In this case, for

165 the first step an array of None will be returned. The following

166 relaxation steps will then have forces available.

167 """

168 line_start = -1

169 natoms = 0

170 record_gradient = True

171

172 for ll, line in enumerate(lines):

173 if 'Number of atoms' in line:

174 natoms = int(line.split()[4])

175 # Read in only first set of forces for each chunk

176 # (Excited state calculations can have several sets of

177 # forces per chunk)

178 elif 'CARTESIAN GRADIENT' in line and record_gradient:

179 line_start = ll + 3

180 record_gradient = False

181

182 # Check if number of atoms is available.

183 if natoms == 0:

184 raise ParseError(

185 'No information about number of atoms in the ORCA output file.'

186 )

187

188 # Forces are not always available. If not available, return None.

189 if line_start == -1:

190 return None

191

192 forces = np.zeros((natoms, 3))

193

194 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):

195 inp = line.split()

196 forces[ll, :] = [float(pos) for pos in inp[3:6]]

197

198 forces *= -Hartree / Bohr

199 return forces

200

201

202def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]:

203 """Separate out the chunks for each geometry relaxation step."""

204 finished = False

205 relaxation_finished = False

206 relaxation = False

207

208 chunk_endings = [

209 'ORCA TERMINATED NORMALLY',

210 'ORCA GEOMETRY RELAXATION STEP',

211 ]

212 chunk_lines = []

213 for line in lines:

214 # Assemble chunks

215 if any([ending in line for ending in chunk_endings]):

216 chunk_lines.append(line)

217 yield chunk_lines

218 chunk_lines = []

219 else:

220 chunk_lines.append(line)

221

222 if 'ORCA TERMINATED NORMALLY' in line:

223 finished = True

224

225 if 'THE OPTIMIZATION HAS CONVERGED' in line:

226 relaxation_finished = True

227

228 # Check if calculation is an optimization.

229 if 'ORCA SCF GRADIENT CALCULATION' in line:

230 relaxation = True

231

232 # Give error if calculation not finished for single-point calculations.

233 if not finished and not relaxation:

234 raise ParseError('Error: Calculation did not finish!')

235 # Give warning if calculation not finished for geometry optimizations.

236 elif not finished and relaxation:

237 warnings.warn('Calculation did not finish!')

238 # Calculation may have finished, but relaxation may have not.

239 elif not relaxation_finished and relaxation:

240 warnings.warn('Geometry optimization did not converge!')

241

242

243@reader

244def read_orca_output(fd, index=slice(None)):

245 """From the ORCA output file: Read Energy, positions, forces

246 and dipole moment.

247

248 Create separated atoms object for each geometry frame through

249 parsing the output file in chunks.

250 """

251 images = []

252

253 # Iterate over chunks and create a separate atoms object for each

254 for chunk in get_chunks(fd):

255 energy = read_energy(chunk)

256 atoms = _read_atoms(chunk)

257 forces = read_forces(chunk)

258 dipole = read_dipole(chunk)

259 charge = read_charge(chunk)

260 com = read_center_of_mass(chunk)

261

262 # Correct dipole moment for centre-of-mass

263 if com is not None and dipole is not None:

264 dipole = dipole + com * charge

265

266 atoms.calc = SinglePointDFTCalculator(

267 atoms,

268 energy=energy,

269 free_energy=energy,

270 forces=forces,

271 # stress=self.stress,

272 # stresses=self.stresses,

273 # magmom=self.magmom,

274 dipole=dipole,

275 # dielectric_tensor=self.dielectric_tensor,

276 # polarization=self.polarization,

277 )

278 # collect images

279 images.append(atoms)

280

281 return images[index]

282

283

284@reader

285def read_orca_engrad(fd):

286 """Read Forces from ORCA .engrad file."""

287 getgrad = False

288 gradients = []

289 tempgrad = []

290 for _, line in enumerate(fd):

291 if line.find('# The current gradient') >= 0:

292 getgrad = True

293 gradients = []

294 tempgrad = []

295 continue

296 if getgrad and '#' not in line:

297 grad = line.split()[-1]

298 tempgrad.append(float(grad))

299 if len(tempgrad) == 3:

300 gradients.append(tempgrad)

301 tempgrad = []

302 if '# The at' in line:

303 getgrad = False

304

305 forces = -np.array(gradients) * Hartree / Bohr

306 return forces

307

308

309@deprecated(

310 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n'

311 'from ase.io import read \n'

312 'atoms = read("orca.out")',

313 DeprecationWarning,

314)

315def read_orca_outputs(directory, stdout_path):

316 """Reproduces old functionality of reading energy, forces etc

317 directly from output without creation of atoms object.

318 This is kept to ensure backwards compatability

319 .. deprecated:: 3.24.0

320 Use of read_orca_outputs is deprected, please

321 process ORCA output by using ase.io.read

322 e.g., read('orca.out')"

323 """

324 stdout_path = Path(stdout_path)

325 atoms = read_orca_output(stdout_path, index=-1)

326 results = {}

327 results['energy'] = atoms.get_total_energy()

328 results['free_energy'] = atoms.get_total_energy()

329

330 try:

331 results['dipole'] = atoms.get_dipole_moment()

332 except PropertyNotImplementedError:

333 pass

334

335 # Does engrad always exist? - No!

336 # Will there be other files -No -> We should just take engrad

337 # as a direct argument. Or maybe this function does not even need to

338 # exist.

339 engrad_path = stdout_path.with_suffix('.engrad')

340 if engrad_path.is_file():

341 results['forces'] = read_orca_engrad(engrad_path)

342 print("""Warning: If you are reading in an engrad file from a

343 geometry optimization, check very carefully.

344 ORCA does not by default supply the forces for the

345 converged geometry!""")

346 return results

Coverage for ase / io / orca.py: 91.71%

193 statements