Coverage for ase / io / orca.py: 91.71%

193 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1import re 

2import warnings 

3from collections.abc import Iterable, Iterator, Sequence 

4from io import StringIO 

5from pathlib import Path 

6 

7import numpy as np 

8 

9from ase import Atoms 

10from ase.calculators.calculator import PropertyNotImplementedError 

11from ase.calculators.singlepoint import SinglePointDFTCalculator 

12from ase.io import ParseError, read 

13from ase.units import Bohr, Hartree 

14from ase.utils import deprecated, reader, writer 

15 

16 

17@reader 

18def read_geom_orcainp(fd): 

19 """Method to read geometry from an ORCA input file.""" 

20 lines = fd.readlines() 

21 

22 # Find geometry region of input file. 

23 stopline = 0 

24 for index, line in enumerate(lines): 

25 if line[1:].startswith('xyz '): 

26 startline = index + 1 

27 stopline = -1 

28 elif line.startswith('end') and stopline == -1: 

29 stopline = index 

30 elif line.startswith('*') and stopline == -1: 

31 stopline = index 

32 # Format and send to read_xyz. 

33 xyz_text = '%i\n' % (stopline - startline) 

34 xyz_text += ' geometry\n' 

35 for line in lines[startline:stopline]: 

36 xyz_text += line 

37 atoms = read(StringIO(xyz_text), format='xyz') 

38 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined 

39 

40 return atoms 

41 

42 

43@writer 

44def write_orca(fd, atoms, params): 

45 # conventional filename: '<name>.inp' 

46 fd.write(f'! {params["orcasimpleinput"]} \n') 

47 fd.write(f'{params["orcablocks"]} \n') 

48 

49 if 'coords' not in params['orcablocks']: 

50 fd.write('*xyz') 

51 fd.write(' %d' % params['charge']) 

52 fd.write(' %d \n' % params['mult']) 

53 for atom in atoms: 

54 if atom.tag == 71: # 71 is ascii G (Ghost) 

55 symbol = atom.symbol + ' : ' 

56 else: 

57 symbol = atom.symbol + ' ' 

58 fd.write( 

59 symbol 

60 + str(atom.position[0]) 

61 + ' ' 

62 + str(atom.position[1]) 

63 + ' ' 

64 + str(atom.position[2]) 

65 + '\n' 

66 ) 

67 fd.write('*\n') 

68 

69 

70def read_charge(lines: list[str]) -> float | None: 

71 """Read sum of atomic charges.""" 

72 charge = None 

73 for line in lines: 

74 if 'Sum of atomic charges' in line: 

75 charge = float(line.split()[-1]) 

76 return charge 

77 

78 

79def read_energy(lines: list[str]) -> float | None: 

80 """Read energy.""" 

81 energy = None 

82 for line in lines: 

83 if 'FINAL SINGLE POINT ENERGY' in line: 

84 if 'Wavefunction not fully converged' in line: 

85 energy = float('nan') 

86 else: 

87 energy = float(line.split()[-1]) 

88 if energy is not None: 

89 return energy * Hartree 

90 return energy 

91 

92 

93def read_center_of_mass(lines: list[str]) -> np.ndarray | None: 

94 """Scan through text for the center of mass""" 

95 # Example: 

96 # 'The origin for moment calculation is the CENTER OF MASS = 

97 # ( 0.002150, -0.296255 0.086315)' 

98 # Note the missing comma in the output 

99 com = None 

100 for line in lines: 

101 if 'The origin for moment calculation is the CENTER OF MASS' in line: 

102 line = re.sub(r'[(),]', '', line) 

103 com = np.array([float(_) for _ in line.split()[-3:]]) 

104 if com is not None: 

105 return com * Bohr # return the last match 

106 return com 

107 

108 

109def read_dipole(lines: list[str]) -> np.ndarray | None: 

110 """Read dipole moment. 

111 

112 Note that the read dipole moment is for the COM frame of reference. 

113 """ 

114 dipole = None 

115 for line in lines: 

116 if 'Total Dipole Moment' in line: 

117 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr 

118 return dipole # Return the last match 

119 

120 

121def _read_atoms(lines: Sequence[str]) -> Atoms: 

122 """Read atomic positions and symbols. Create Atoms object.""" 

123 line_start = -1 

124 natoms = 0 

125 

126 for ll, line in enumerate(lines): 

127 if 'Number of atoms' in line: 

128 natoms = int(line.split()[4]) 

129 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line: 

130 line_start = ll + 2 

131 

132 # Check if atoms present and if their number is given. 

133 if line_start == -1: 

134 raise ParseError( 

135 'No information about the structure in the ORCA output file.' 

136 ) 

137 elif natoms == 0: 

138 raise ParseError( 

139 'No information about number of atoms in the ORCA output file.' 

140 ) 

141 

142 positions = np.zeros((natoms, 3)) 

143 symbols = [''] * natoms 

144 

145 for ll, line in enumerate(lines[line_start : (line_start + natoms)]): 

146 inp = line.split() 

147 positions[ll, :] = [float(pos) for pos in inp[1:4]] 

148 symbols[ll] = inp[0] 

149 

150 atoms = Atoms(symbols=symbols, positions=positions) 

151 atoms.set_pbc([False, False, False]) 

152 

153 return atoms 

154 

155 

156def read_forces(lines: list[str]) -> np.ndarray | None: 

157 """Read forces from output file if available. Else return None. 

158 

159 Taking the forces from the output files (instead of the engrad-file) to 

160 be more general. The forces can be present in general output even if 

161 the engrad file is not there. 

162 

163 Note: If more than one geometry relaxation step is available, 

164 forces do not always exist for the first step. In this case, for 

165 the first step an array of None will be returned. The following 

166 relaxation steps will then have forces available. 

167 """ 

168 line_start = -1 

169 natoms = 0 

170 record_gradient = True 

171 

172 for ll, line in enumerate(lines): 

173 if 'Number of atoms' in line: 

174 natoms = int(line.split()[4]) 

175 # Read in only first set of forces for each chunk 

176 # (Excited state calculations can have several sets of 

177 # forces per chunk) 

178 elif 'CARTESIAN GRADIENT' in line and record_gradient: 

179 line_start = ll + 3 

180 record_gradient = False 

181 

182 # Check if number of atoms is available. 

183 if natoms == 0: 

184 raise ParseError( 

185 'No information about number of atoms in the ORCA output file.' 

186 ) 

187 

188 # Forces are not always available. If not available, return None. 

189 if line_start == -1: 

190 return None 

191 

192 forces = np.zeros((natoms, 3)) 

193 

194 for ll, line in enumerate(lines[line_start : (line_start + natoms)]): 

195 inp = line.split() 

196 forces[ll, :] = [float(pos) for pos in inp[3:6]] 

197 

198 forces *= -Hartree / Bohr 

199 return forces 

200 

201 

202def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]: 

203 """Separate out the chunks for each geometry relaxation step.""" 

204 finished = False 

205 relaxation_finished = False 

206 relaxation = False 

207 

208 chunk_endings = [ 

209 'ORCA TERMINATED NORMALLY', 

210 'ORCA GEOMETRY RELAXATION STEP', 

211 ] 

212 chunk_lines = [] 

213 for line in lines: 

214 # Assemble chunks 

215 if any([ending in line for ending in chunk_endings]): 

216 chunk_lines.append(line) 

217 yield chunk_lines 

218 chunk_lines = [] 

219 else: 

220 chunk_lines.append(line) 

221 

222 if 'ORCA TERMINATED NORMALLY' in line: 

223 finished = True 

224 

225 if 'THE OPTIMIZATION HAS CONVERGED' in line: 

226 relaxation_finished = True 

227 

228 # Check if calculation is an optimization. 

229 if 'ORCA SCF GRADIENT CALCULATION' in line: 

230 relaxation = True 

231 

232 # Give error if calculation not finished for single-point calculations. 

233 if not finished and not relaxation: 

234 raise ParseError('Error: Calculation did not finish!') 

235 # Give warning if calculation not finished for geometry optimizations. 

236 elif not finished and relaxation: 

237 warnings.warn('Calculation did not finish!') 

238 # Calculation may have finished, but relaxation may have not. 

239 elif not relaxation_finished and relaxation: 

240 warnings.warn('Geometry optimization did not converge!') 

241 

242 

243@reader 

244def read_orca_output(fd, index=slice(None)): 

245 """From the ORCA output file: Read Energy, positions, forces 

246 and dipole moment. 

247 

248 Create separated atoms object for each geometry frame through 

249 parsing the output file in chunks. 

250 """ 

251 images = [] 

252 

253 # Iterate over chunks and create a separate atoms object for each 

254 for chunk in get_chunks(fd): 

255 energy = read_energy(chunk) 

256 atoms = _read_atoms(chunk) 

257 forces = read_forces(chunk) 

258 dipole = read_dipole(chunk) 

259 charge = read_charge(chunk) 

260 com = read_center_of_mass(chunk) 

261 

262 # Correct dipole moment for centre-of-mass 

263 if com is not None and dipole is not None: 

264 dipole = dipole + com * charge 

265 

266 atoms.calc = SinglePointDFTCalculator( 

267 atoms, 

268 energy=energy, 

269 free_energy=energy, 

270 forces=forces, 

271 # stress=self.stress, 

272 # stresses=self.stresses, 

273 # magmom=self.magmom, 

274 dipole=dipole, 

275 # dielectric_tensor=self.dielectric_tensor, 

276 # polarization=self.polarization, 

277 ) 

278 # collect images 

279 images.append(atoms) 

280 

281 return images[index] 

282 

283 

284@reader 

285def read_orca_engrad(fd): 

286 """Read Forces from ORCA .engrad file.""" 

287 getgrad = False 

288 gradients = [] 

289 tempgrad = [] 

290 for _, line in enumerate(fd): 

291 if line.find('# The current gradient') >= 0: 

292 getgrad = True 

293 gradients = [] 

294 tempgrad = [] 

295 continue 

296 if getgrad and '#' not in line: 

297 grad = line.split()[-1] 

298 tempgrad.append(float(grad)) 

299 if len(tempgrad) == 3: 

300 gradients.append(tempgrad) 

301 tempgrad = [] 

302 if '# The at' in line: 

303 getgrad = False 

304 

305 forces = -np.array(gradients) * Hartree / Bohr 

306 return forces 

307 

308 

309@deprecated( 

310 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n' 

311 'from ase.io import read \n' 

312 'atoms = read("orca.out")', 

313 DeprecationWarning, 

314) 

315def read_orca_outputs(directory, stdout_path): 

316 """Reproduces old functionality of reading energy, forces etc 

317 directly from output without creation of atoms object. 

318 This is kept to ensure backwards compatability 

319 .. deprecated:: 3.24.0 

320 Use of read_orca_outputs is deprected, please 

321 process ORCA output by using ase.io.read 

322 e.g., read('orca.out')" 

323 """ 

324 stdout_path = Path(stdout_path) 

325 atoms = read_orca_output(stdout_path, index=-1) 

326 results = {} 

327 results['energy'] = atoms.get_total_energy() 

328 results['free_energy'] = atoms.get_total_energy() 

329 

330 try: 

331 results['dipole'] = atoms.get_dipole_moment() 

332 except PropertyNotImplementedError: 

333 pass 

334 

335 # Does engrad always exist? - No! 

336 # Will there be other files -No -> We should just take engrad 

337 # as a direct argument. Or maybe this function does not even need to 

338 # exist. 

339 engrad_path = stdout_path.with_suffix('.engrad') 

340 if engrad_path.is_file(): 

341 results['forces'] = read_orca_engrad(engrad_path) 

342 print("""Warning: If you are reading in an engrad file from a 

343 geometry optimization, check very carefully. 

344 ORCA does not by default supply the forces for the 

345 converged geometry!""") 

346 return results