Coverage for ase / io / orca.py: 91.71%
193 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
1import re
2import warnings
3from collections.abc import Iterable, Iterator, Sequence
4from io import StringIO
5from pathlib import Path
7import numpy as np
9from ase import Atoms
10from ase.calculators.calculator import PropertyNotImplementedError
11from ase.calculators.singlepoint import SinglePointDFTCalculator
12from ase.io import ParseError, read
13from ase.units import Bohr, Hartree
14from ase.utils import deprecated, reader, writer
17@reader
18def read_geom_orcainp(fd):
19 """Method to read geometry from an ORCA input file."""
20 lines = fd.readlines()
22 # Find geometry region of input file.
23 stopline = 0
24 for index, line in enumerate(lines):
25 if line[1:].startswith('xyz '):
26 startline = index + 1
27 stopline = -1
28 elif line.startswith('end') and stopline == -1:
29 stopline = index
30 elif line.startswith('*') and stopline == -1:
31 stopline = index
32 # Format and send to read_xyz.
33 xyz_text = '%i\n' % (stopline - startline)
34 xyz_text += ' geometry\n'
35 for line in lines[startline:stopline]:
36 xyz_text += line
37 atoms = read(StringIO(xyz_text), format='xyz')
38 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined
40 return atoms
43@writer
44def write_orca(fd, atoms, params):
45 # conventional filename: '<name>.inp'
46 fd.write(f'! {params["orcasimpleinput"]} \n')
47 fd.write(f'{params["orcablocks"]} \n')
49 if 'coords' not in params['orcablocks']:
50 fd.write('*xyz')
51 fd.write(' %d' % params['charge'])
52 fd.write(' %d \n' % params['mult'])
53 for atom in atoms:
54 if atom.tag == 71: # 71 is ascii G (Ghost)
55 symbol = atom.symbol + ' : '
56 else:
57 symbol = atom.symbol + ' '
58 fd.write(
59 symbol
60 + str(atom.position[0])
61 + ' '
62 + str(atom.position[1])
63 + ' '
64 + str(atom.position[2])
65 + '\n'
66 )
67 fd.write('*\n')
70def read_charge(lines: list[str]) -> float | None:
71 """Read sum of atomic charges."""
72 charge = None
73 for line in lines:
74 if 'Sum of atomic charges' in line:
75 charge = float(line.split()[-1])
76 return charge
79def read_energy(lines: list[str]) -> float | None:
80 """Read energy."""
81 energy = None
82 for line in lines:
83 if 'FINAL SINGLE POINT ENERGY' in line:
84 if 'Wavefunction not fully converged' in line:
85 energy = float('nan')
86 else:
87 energy = float(line.split()[-1])
88 if energy is not None:
89 return energy * Hartree
90 return energy
93def read_center_of_mass(lines: list[str]) -> np.ndarray | None:
94 """Scan through text for the center of mass"""
95 # Example:
96 # 'The origin for moment calculation is the CENTER OF MASS =
97 # ( 0.002150, -0.296255 0.086315)'
98 # Note the missing comma in the output
99 com = None
100 for line in lines:
101 if 'The origin for moment calculation is the CENTER OF MASS' in line:
102 line = re.sub(r'[(),]', '', line)
103 com = np.array([float(_) for _ in line.split()[-3:]])
104 if com is not None:
105 return com * Bohr # return the last match
106 return com
109def read_dipole(lines: list[str]) -> np.ndarray | None:
110 """Read dipole moment.
112 Note that the read dipole moment is for the COM frame of reference.
113 """
114 dipole = None
115 for line in lines:
116 if 'Total Dipole Moment' in line:
117 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr
118 return dipole # Return the last match
121def _read_atoms(lines: Sequence[str]) -> Atoms:
122 """Read atomic positions and symbols. Create Atoms object."""
123 line_start = -1
124 natoms = 0
126 for ll, line in enumerate(lines):
127 if 'Number of atoms' in line:
128 natoms = int(line.split()[4])
129 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line:
130 line_start = ll + 2
132 # Check if atoms present and if their number is given.
133 if line_start == -1:
134 raise ParseError(
135 'No information about the structure in the ORCA output file.'
136 )
137 elif natoms == 0:
138 raise ParseError(
139 'No information about number of atoms in the ORCA output file.'
140 )
142 positions = np.zeros((natoms, 3))
143 symbols = [''] * natoms
145 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
146 inp = line.split()
147 positions[ll, :] = [float(pos) for pos in inp[1:4]]
148 symbols[ll] = inp[0]
150 atoms = Atoms(symbols=symbols, positions=positions)
151 atoms.set_pbc([False, False, False])
153 return atoms
156def read_forces(lines: list[str]) -> np.ndarray | None:
157 """Read forces from output file if available. Else return None.
159 Taking the forces from the output files (instead of the engrad-file) to
160 be more general. The forces can be present in general output even if
161 the engrad file is not there.
163 Note: If more than one geometry relaxation step is available,
164 forces do not always exist for the first step. In this case, for
165 the first step an array of None will be returned. The following
166 relaxation steps will then have forces available.
167 """
168 line_start = -1
169 natoms = 0
170 record_gradient = True
172 for ll, line in enumerate(lines):
173 if 'Number of atoms' in line:
174 natoms = int(line.split()[4])
175 # Read in only first set of forces for each chunk
176 # (Excited state calculations can have several sets of
177 # forces per chunk)
178 elif 'CARTESIAN GRADIENT' in line and record_gradient:
179 line_start = ll + 3
180 record_gradient = False
182 # Check if number of atoms is available.
183 if natoms == 0:
184 raise ParseError(
185 'No information about number of atoms in the ORCA output file.'
186 )
188 # Forces are not always available. If not available, return None.
189 if line_start == -1:
190 return None
192 forces = np.zeros((natoms, 3))
194 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
195 inp = line.split()
196 forces[ll, :] = [float(pos) for pos in inp[3:6]]
198 forces *= -Hartree / Bohr
199 return forces
202def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]:
203 """Separate out the chunks for each geometry relaxation step."""
204 finished = False
205 relaxation_finished = False
206 relaxation = False
208 chunk_endings = [
209 'ORCA TERMINATED NORMALLY',
210 'ORCA GEOMETRY RELAXATION STEP',
211 ]
212 chunk_lines = []
213 for line in lines:
214 # Assemble chunks
215 if any([ending in line for ending in chunk_endings]):
216 chunk_lines.append(line)
217 yield chunk_lines
218 chunk_lines = []
219 else:
220 chunk_lines.append(line)
222 if 'ORCA TERMINATED NORMALLY' in line:
223 finished = True
225 if 'THE OPTIMIZATION HAS CONVERGED' in line:
226 relaxation_finished = True
228 # Check if calculation is an optimization.
229 if 'ORCA SCF GRADIENT CALCULATION' in line:
230 relaxation = True
232 # Give error if calculation not finished for single-point calculations.
233 if not finished and not relaxation:
234 raise ParseError('Error: Calculation did not finish!')
235 # Give warning if calculation not finished for geometry optimizations.
236 elif not finished and relaxation:
237 warnings.warn('Calculation did not finish!')
238 # Calculation may have finished, but relaxation may have not.
239 elif not relaxation_finished and relaxation:
240 warnings.warn('Geometry optimization did not converge!')
243@reader
244def read_orca_output(fd, index=slice(None)):
245 """From the ORCA output file: Read Energy, positions, forces
246 and dipole moment.
248 Create separated atoms object for each geometry frame through
249 parsing the output file in chunks.
250 """
251 images = []
253 # Iterate over chunks and create a separate atoms object for each
254 for chunk in get_chunks(fd):
255 energy = read_energy(chunk)
256 atoms = _read_atoms(chunk)
257 forces = read_forces(chunk)
258 dipole = read_dipole(chunk)
259 charge = read_charge(chunk)
260 com = read_center_of_mass(chunk)
262 # Correct dipole moment for centre-of-mass
263 if com is not None and dipole is not None:
264 dipole = dipole + com * charge
266 atoms.calc = SinglePointDFTCalculator(
267 atoms,
268 energy=energy,
269 free_energy=energy,
270 forces=forces,
271 # stress=self.stress,
272 # stresses=self.stresses,
273 # magmom=self.magmom,
274 dipole=dipole,
275 # dielectric_tensor=self.dielectric_tensor,
276 # polarization=self.polarization,
277 )
278 # collect images
279 images.append(atoms)
281 return images[index]
284@reader
285def read_orca_engrad(fd):
286 """Read Forces from ORCA .engrad file."""
287 getgrad = False
288 gradients = []
289 tempgrad = []
290 for _, line in enumerate(fd):
291 if line.find('# The current gradient') >= 0:
292 getgrad = True
293 gradients = []
294 tempgrad = []
295 continue
296 if getgrad and '#' not in line:
297 grad = line.split()[-1]
298 tempgrad.append(float(grad))
299 if len(tempgrad) == 3:
300 gradients.append(tempgrad)
301 tempgrad = []
302 if '# The at' in line:
303 getgrad = False
305 forces = -np.array(gradients) * Hartree / Bohr
306 return forces
309@deprecated(
310 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n'
311 'from ase.io import read \n'
312 'atoms = read("orca.out")',
313 DeprecationWarning,
314)
315def read_orca_outputs(directory, stdout_path):
316 """Reproduces old functionality of reading energy, forces etc
317 directly from output without creation of atoms object.
318 This is kept to ensure backwards compatability
319 .. deprecated:: 3.24.0
320 Use of read_orca_outputs is deprected, please
321 process ORCA output by using ase.io.read
322 e.g., read('orca.out')"
323 """
324 stdout_path = Path(stdout_path)
325 atoms = read_orca_output(stdout_path, index=-1)
326 results = {}
327 results['energy'] = atoms.get_total_energy()
328 results['free_energy'] = atoms.get_total_energy()
330 try:
331 results['dipole'] = atoms.get_dipole_moment()
332 except PropertyNotImplementedError:
333 pass
335 # Does engrad always exist? - No!
336 # Will there be other files -No -> We should just take engrad
337 # as a direct argument. Or maybe this function does not even need to
338 # exist.
339 engrad_path = stdout_path.with_suffix('.engrad')
340 if engrad_path.is_file():
341 results['forces'] = read_orca_engrad(engrad_path)
342 print("""Warning: If you are reading in an engrad file from a
343 geometry optimization, check very carefully.
344 ORCA does not by default supply the forces for the
345 converged geometry!""")
346 return results