Coverage for /builds/ase/ase/ase/io/orca.py: 91.75%

194 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1import re 

2import warnings 

3from collections.abc import Iterable, Iterator 

4from io import StringIO 

5from pathlib import Path 

6from typing import List, Optional, Sequence 

7 

8import numpy as np 

9 

10from ase import Atoms 

11from ase.calculators.calculator import PropertyNotImplementedError 

12from ase.calculators.singlepoint import SinglePointDFTCalculator 

13from ase.io import ParseError, read 

14from ase.units import Bohr, Hartree 

15from ase.utils import deprecated, reader, writer 

16 

17 

18@reader 

19def read_geom_orcainp(fd): 

20 """Method to read geometry from an ORCA input file.""" 

21 lines = fd.readlines() 

22 

23 # Find geometry region of input file. 

24 stopline = 0 

25 for index, line in enumerate(lines): 

26 if line[1:].startswith('xyz '): 

27 startline = index + 1 

28 stopline = -1 

29 elif line.startswith('end') and stopline == -1: 

30 stopline = index 

31 elif line.startswith('*') and stopline == -1: 

32 stopline = index 

33 # Format and send to read_xyz. 

34 xyz_text = '%i\n' % (stopline - startline) 

35 xyz_text += ' geometry\n' 

36 for line in lines[startline:stopline]: 

37 xyz_text += line 

38 atoms = read(StringIO(xyz_text), format='xyz') 

39 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined 

40 

41 return atoms 

42 

43 

44@writer 

45def write_orca(fd, atoms, params): 

46 # conventional filename: '<name>.inp' 

47 fd.write(f'! {params["orcasimpleinput"]} \n') 

48 fd.write(f'{params["orcablocks"]} \n') 

49 

50 if 'coords' not in params['orcablocks']: 

51 fd.write('*xyz') 

52 fd.write(' %d' % params['charge']) 

53 fd.write(' %d \n' % params['mult']) 

54 for atom in atoms: 

55 if atom.tag == 71: # 71 is ascii G (Ghost) 

56 symbol = atom.symbol + ' : ' 

57 else: 

58 symbol = atom.symbol + ' ' 

59 fd.write( 

60 symbol 

61 + str(atom.position[0]) 

62 + ' ' 

63 + str(atom.position[1]) 

64 + ' ' 

65 + str(atom.position[2]) 

66 + '\n' 

67 ) 

68 fd.write('*\n') 

69 

70 

71def read_charge(lines: List[str]) -> Optional[float]: 

72 """Read sum of atomic charges.""" 

73 charge = None 

74 for line in lines: 

75 if 'Sum of atomic charges' in line: 

76 charge = float(line.split()[-1]) 

77 return charge 

78 

79 

80def read_energy(lines: List[str]) -> Optional[float]: 

81 """Read energy.""" 

82 energy = None 

83 for line in lines: 

84 if 'FINAL SINGLE POINT ENERGY' in line: 

85 if 'Wavefunction not fully converged' in line: 

86 energy = float('nan') 

87 else: 

88 energy = float(line.split()[-1]) 

89 if energy is not None: 

90 return energy * Hartree 

91 return energy 

92 

93 

94def read_center_of_mass(lines: List[str]) -> Optional[np.ndarray]: 

95 """Scan through text for the center of mass""" 

96 # Example: 

97 # 'The origin for moment calculation is the CENTER OF MASS = 

98 # ( 0.002150, -0.296255 0.086315)' 

99 # Note the missing comma in the output 

100 com = None 

101 for line in lines: 

102 if 'The origin for moment calculation is the CENTER OF MASS' in line: 

103 line = re.sub(r'[(),]', '', line) 

104 com = np.array([float(_) for _ in line.split()[-3:]]) 

105 if com is not None: 

106 return com * Bohr # return the last match 

107 return com 

108 

109 

110def read_dipole(lines: List[str]) -> Optional[np.ndarray]: 

111 """Read dipole moment. 

112 

113 Note that the read dipole moment is for the COM frame of reference. 

114 """ 

115 dipole = None 

116 for line in lines: 

117 if 'Total Dipole Moment' in line: 

118 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr 

119 return dipole # Return the last match 

120 

121 

122def _read_atoms(lines: Sequence[str]) -> Atoms: 

123 """Read atomic positions and symbols. Create Atoms object.""" 

124 line_start = -1 

125 natoms = 0 

126 

127 for ll, line in enumerate(lines): 

128 if 'Number of atoms' in line: 

129 natoms = int(line.split()[4]) 

130 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line: 

131 line_start = ll + 2 

132 

133 # Check if atoms present and if their number is given. 

134 if line_start == -1: 

135 raise ParseError( 

136 'No information about the structure in the ORCA output file.' 

137 ) 

138 elif natoms == 0: 

139 raise ParseError( 

140 'No information about number of atoms in the ORCA output file.' 

141 ) 

142 

143 positions = np.zeros((natoms, 3)) 

144 symbols = [''] * natoms 

145 

146 for ll, line in enumerate(lines[line_start : (line_start + natoms)]): 

147 inp = line.split() 

148 positions[ll, :] = [float(pos) for pos in inp[1:4]] 

149 symbols[ll] = inp[0] 

150 

151 atoms = Atoms(symbols=symbols, positions=positions) 

152 atoms.set_pbc([False, False, False]) 

153 

154 return atoms 

155 

156 

157def read_forces(lines: List[str]) -> Optional[np.ndarray]: 

158 """Read forces from output file if available. Else return None. 

159 

160 Taking the forces from the output files (instead of the engrad-file) to 

161 be more general. The forces can be present in general output even if 

162 the engrad file is not there. 

163 

164 Note: If more than one geometry relaxation step is available, 

165 forces do not always exist for the first step. In this case, for 

166 the first step an array of None will be returned. The following 

167 relaxation steps will then have forces available. 

168 """ 

169 line_start = -1 

170 natoms = 0 

171 record_gradient = True 

172 

173 for ll, line in enumerate(lines): 

174 if 'Number of atoms' in line: 

175 natoms = int(line.split()[4]) 

176 # Read in only first set of forces for each chunk 

177 # (Excited state calculations can have several sets of 

178 # forces per chunk) 

179 elif 'CARTESIAN GRADIENT' in line and record_gradient: 

180 line_start = ll + 3 

181 record_gradient = False 

182 

183 # Check if number of atoms is available. 

184 if natoms == 0: 

185 raise ParseError( 

186 'No information about number of atoms in the ORCA output file.' 

187 ) 

188 

189 # Forces are not always available. If not available, return None. 

190 if line_start == -1: 

191 return None 

192 

193 forces = np.zeros((natoms, 3)) 

194 

195 for ll, line in enumerate(lines[line_start : (line_start + natoms)]): 

196 inp = line.split() 

197 forces[ll, :] = [float(pos) for pos in inp[3:6]] 

198 

199 forces *= -Hartree / Bohr 

200 return forces 

201 

202 

203def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]: 

204 """Separate out the chunks for each geometry relaxation step.""" 

205 finished = False 

206 relaxation_finished = False 

207 relaxation = False 

208 

209 chunk_endings = [ 

210 'ORCA TERMINATED NORMALLY', 

211 'ORCA GEOMETRY RELAXATION STEP', 

212 ] 

213 chunk_lines = [] 

214 for line in lines: 

215 # Assemble chunks 

216 if any([ending in line for ending in chunk_endings]): 

217 chunk_lines.append(line) 

218 yield chunk_lines 

219 chunk_lines = [] 

220 else: 

221 chunk_lines.append(line) 

222 

223 if 'ORCA TERMINATED NORMALLY' in line: 

224 finished = True 

225 

226 if 'THE OPTIMIZATION HAS CONVERGED' in line: 

227 relaxation_finished = True 

228 

229 # Check if calculation is an optimization. 

230 if 'ORCA SCF GRADIENT CALCULATION' in line: 

231 relaxation = True 

232 

233 # Give error if calculation not finished for single-point calculations. 

234 if not finished and not relaxation: 

235 raise ParseError('Error: Calculation did not finish!') 

236 # Give warning if calculation not finished for geometry optimizations. 

237 elif not finished and relaxation: 

238 warnings.warn('Calculation did not finish!') 

239 # Calculation may have finished, but relaxation may have not. 

240 elif not relaxation_finished and relaxation: 

241 warnings.warn('Geometry optimization did not converge!') 

242 

243 

244@reader 

245def read_orca_output(fd, index=slice(None)): 

246 """From the ORCA output file: Read Energy, positions, forces 

247 and dipole moment. 

248 

249 Create separated atoms object for each geometry frame through 

250 parsing the output file in chunks. 

251 """ 

252 images = [] 

253 

254 # Iterate over chunks and create a separate atoms object for each 

255 for chunk in get_chunks(fd): 

256 energy = read_energy(chunk) 

257 atoms = _read_atoms(chunk) 

258 forces = read_forces(chunk) 

259 dipole = read_dipole(chunk) 

260 charge = read_charge(chunk) 

261 com = read_center_of_mass(chunk) 

262 

263 # Correct dipole moment for centre-of-mass 

264 if com is not None and dipole is not None: 

265 dipole = dipole + com * charge 

266 

267 atoms.calc = SinglePointDFTCalculator( 

268 atoms, 

269 energy=energy, 

270 free_energy=energy, 

271 forces=forces, 

272 # stress=self.stress, 

273 # stresses=self.stresses, 

274 # magmom=self.magmom, 

275 dipole=dipole, 

276 # dielectric_tensor=self.dielectric_tensor, 

277 # polarization=self.polarization, 

278 ) 

279 # collect images 

280 images.append(atoms) 

281 

282 return images[index] 

283 

284 

285@reader 

286def read_orca_engrad(fd): 

287 """Read Forces from ORCA .engrad file.""" 

288 getgrad = False 

289 gradients = [] 

290 tempgrad = [] 

291 for _, line in enumerate(fd): 

292 if line.find('# The current gradient') >= 0: 

293 getgrad = True 

294 gradients = [] 

295 tempgrad = [] 

296 continue 

297 if getgrad and '#' not in line: 

298 grad = line.split()[-1] 

299 tempgrad.append(float(grad)) 

300 if len(tempgrad) == 3: 

301 gradients.append(tempgrad) 

302 tempgrad = [] 

303 if '# The at' in line: 

304 getgrad = False 

305 

306 forces = -np.array(gradients) * Hartree / Bohr 

307 return forces 

308 

309 

310@deprecated( 

311 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n' 

312 'from ase.io import read \n' 

313 'atoms = read("orca.out")', 

314 DeprecationWarning, 

315) 

316def read_orca_outputs(directory, stdout_path): 

317 """Reproduces old functionality of reading energy, forces etc 

318 directly from output without creation of atoms object. 

319 This is kept to ensure backwards compatability 

320 .. deprecated:: 3.24.0 

321 Use of read_orca_outputs is deprected, please 

322 process ORCA output by using ase.io.read 

323 e.g., read('orca.out')" 

324 """ 

325 stdout_path = Path(stdout_path) 

326 atoms = read_orca_output(stdout_path, index=-1) 

327 results = {} 

328 results['energy'] = atoms.get_total_energy() 

329 results['free_energy'] = atoms.get_total_energy() 

330 

331 try: 

332 results['dipole'] = atoms.get_dipole_moment() 

333 except PropertyNotImplementedError: 

334 pass 

335 

336 # Does engrad always exist? - No! 

337 # Will there be other files -No -> We should just take engrad 

338 # as a direct argument. Or maybe this function does not even need to 

339 # exist. 

340 engrad_path = stdout_path.with_suffix('.engrad') 

341 if engrad_path.is_file(): 

342 results['forces'] = read_orca_engrad(engrad_path) 

343 print("""Warning: If you are reading in an engrad file from a 

344 geometry optimization, check very carefully. 

345 ORCA does not by default supply the forces for the 

346 converged geometry!""") 

347 return results