Coverage for /builds/ase/ase/ase/io/orca.py: 91.75%
194 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1import re
2import warnings
3from collections.abc import Iterable, Iterator
4from io import StringIO
5from pathlib import Path
6from typing import List, Optional, Sequence
8import numpy as np
10from ase import Atoms
11from ase.calculators.calculator import PropertyNotImplementedError
12from ase.calculators.singlepoint import SinglePointDFTCalculator
13from ase.io import ParseError, read
14from ase.units import Bohr, Hartree
15from ase.utils import deprecated, reader, writer
18@reader
19def read_geom_orcainp(fd):
20 """Method to read geometry from an ORCA input file."""
21 lines = fd.readlines()
23 # Find geometry region of input file.
24 stopline = 0
25 for index, line in enumerate(lines):
26 if line[1:].startswith('xyz '):
27 startline = index + 1
28 stopline = -1
29 elif line.startswith('end') and stopline == -1:
30 stopline = index
31 elif line.startswith('*') and stopline == -1:
32 stopline = index
33 # Format and send to read_xyz.
34 xyz_text = '%i\n' % (stopline - startline)
35 xyz_text += ' geometry\n'
36 for line in lines[startline:stopline]:
37 xyz_text += line
38 atoms = read(StringIO(xyz_text), format='xyz')
39 atoms.set_cell((0.0, 0.0, 0.0)) # no unit cell defined
41 return atoms
44@writer
45def write_orca(fd, atoms, params):
46 # conventional filename: '<name>.inp'
47 fd.write(f'! {params["orcasimpleinput"]} \n')
48 fd.write(f'{params["orcablocks"]} \n')
50 if 'coords' not in params['orcablocks']:
51 fd.write('*xyz')
52 fd.write(' %d' % params['charge'])
53 fd.write(' %d \n' % params['mult'])
54 for atom in atoms:
55 if atom.tag == 71: # 71 is ascii G (Ghost)
56 symbol = atom.symbol + ' : '
57 else:
58 symbol = atom.symbol + ' '
59 fd.write(
60 symbol
61 + str(atom.position[0])
62 + ' '
63 + str(atom.position[1])
64 + ' '
65 + str(atom.position[2])
66 + '\n'
67 )
68 fd.write('*\n')
71def read_charge(lines: List[str]) -> Optional[float]:
72 """Read sum of atomic charges."""
73 charge = None
74 for line in lines:
75 if 'Sum of atomic charges' in line:
76 charge = float(line.split()[-1])
77 return charge
80def read_energy(lines: List[str]) -> Optional[float]:
81 """Read energy."""
82 energy = None
83 for line in lines:
84 if 'FINAL SINGLE POINT ENERGY' in line:
85 if 'Wavefunction not fully converged' in line:
86 energy = float('nan')
87 else:
88 energy = float(line.split()[-1])
89 if energy is not None:
90 return energy * Hartree
91 return energy
94def read_center_of_mass(lines: List[str]) -> Optional[np.ndarray]:
95 """Scan through text for the center of mass"""
96 # Example:
97 # 'The origin for moment calculation is the CENTER OF MASS =
98 # ( 0.002150, -0.296255 0.086315)'
99 # Note the missing comma in the output
100 com = None
101 for line in lines:
102 if 'The origin for moment calculation is the CENTER OF MASS' in line:
103 line = re.sub(r'[(),]', '', line)
104 com = np.array([float(_) for _ in line.split()[-3:]])
105 if com is not None:
106 return com * Bohr # return the last match
107 return com
110def read_dipole(lines: List[str]) -> Optional[np.ndarray]:
111 """Read dipole moment.
113 Note that the read dipole moment is for the COM frame of reference.
114 """
115 dipole = None
116 for line in lines:
117 if 'Total Dipole Moment' in line:
118 dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr
119 return dipole # Return the last match
122def _read_atoms(lines: Sequence[str]) -> Atoms:
123 """Read atomic positions and symbols. Create Atoms object."""
124 line_start = -1
125 natoms = 0
127 for ll, line in enumerate(lines):
128 if 'Number of atoms' in line:
129 natoms = int(line.split()[4])
130 elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line:
131 line_start = ll + 2
133 # Check if atoms present and if their number is given.
134 if line_start == -1:
135 raise ParseError(
136 'No information about the structure in the ORCA output file.'
137 )
138 elif natoms == 0:
139 raise ParseError(
140 'No information about number of atoms in the ORCA output file.'
141 )
143 positions = np.zeros((natoms, 3))
144 symbols = [''] * natoms
146 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
147 inp = line.split()
148 positions[ll, :] = [float(pos) for pos in inp[1:4]]
149 symbols[ll] = inp[0]
151 atoms = Atoms(symbols=symbols, positions=positions)
152 atoms.set_pbc([False, False, False])
154 return atoms
157def read_forces(lines: List[str]) -> Optional[np.ndarray]:
158 """Read forces from output file if available. Else return None.
160 Taking the forces from the output files (instead of the engrad-file) to
161 be more general. The forces can be present in general output even if
162 the engrad file is not there.
164 Note: If more than one geometry relaxation step is available,
165 forces do not always exist for the first step. In this case, for
166 the first step an array of None will be returned. The following
167 relaxation steps will then have forces available.
168 """
169 line_start = -1
170 natoms = 0
171 record_gradient = True
173 for ll, line in enumerate(lines):
174 if 'Number of atoms' in line:
175 natoms = int(line.split()[4])
176 # Read in only first set of forces for each chunk
177 # (Excited state calculations can have several sets of
178 # forces per chunk)
179 elif 'CARTESIAN GRADIENT' in line and record_gradient:
180 line_start = ll + 3
181 record_gradient = False
183 # Check if number of atoms is available.
184 if natoms == 0:
185 raise ParseError(
186 'No information about number of atoms in the ORCA output file.'
187 )
189 # Forces are not always available. If not available, return None.
190 if line_start == -1:
191 return None
193 forces = np.zeros((natoms, 3))
195 for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
196 inp = line.split()
197 forces[ll, :] = [float(pos) for pos in inp[3:6]]
199 forces *= -Hartree / Bohr
200 return forces
203def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]:
204 """Separate out the chunks for each geometry relaxation step."""
205 finished = False
206 relaxation_finished = False
207 relaxation = False
209 chunk_endings = [
210 'ORCA TERMINATED NORMALLY',
211 'ORCA GEOMETRY RELAXATION STEP',
212 ]
213 chunk_lines = []
214 for line in lines:
215 # Assemble chunks
216 if any([ending in line for ending in chunk_endings]):
217 chunk_lines.append(line)
218 yield chunk_lines
219 chunk_lines = []
220 else:
221 chunk_lines.append(line)
223 if 'ORCA TERMINATED NORMALLY' in line:
224 finished = True
226 if 'THE OPTIMIZATION HAS CONVERGED' in line:
227 relaxation_finished = True
229 # Check if calculation is an optimization.
230 if 'ORCA SCF GRADIENT CALCULATION' in line:
231 relaxation = True
233 # Give error if calculation not finished for single-point calculations.
234 if not finished and not relaxation:
235 raise ParseError('Error: Calculation did not finish!')
236 # Give warning if calculation not finished for geometry optimizations.
237 elif not finished and relaxation:
238 warnings.warn('Calculation did not finish!')
239 # Calculation may have finished, but relaxation may have not.
240 elif not relaxation_finished and relaxation:
241 warnings.warn('Geometry optimization did not converge!')
244@reader
245def read_orca_output(fd, index=slice(None)):
246 """From the ORCA output file: Read Energy, positions, forces
247 and dipole moment.
249 Create separated atoms object for each geometry frame through
250 parsing the output file in chunks.
251 """
252 images = []
254 # Iterate over chunks and create a separate atoms object for each
255 for chunk in get_chunks(fd):
256 energy = read_energy(chunk)
257 atoms = _read_atoms(chunk)
258 forces = read_forces(chunk)
259 dipole = read_dipole(chunk)
260 charge = read_charge(chunk)
261 com = read_center_of_mass(chunk)
263 # Correct dipole moment for centre-of-mass
264 if com is not None and dipole is not None:
265 dipole = dipole + com * charge
267 atoms.calc = SinglePointDFTCalculator(
268 atoms,
269 energy=energy,
270 free_energy=energy,
271 forces=forces,
272 # stress=self.stress,
273 # stresses=self.stresses,
274 # magmom=self.magmom,
275 dipole=dipole,
276 # dielectric_tensor=self.dielectric_tensor,
277 # polarization=self.polarization,
278 )
279 # collect images
280 images.append(atoms)
282 return images[index]
285@reader
286def read_orca_engrad(fd):
287 """Read Forces from ORCA .engrad file."""
288 getgrad = False
289 gradients = []
290 tempgrad = []
291 for _, line in enumerate(fd):
292 if line.find('# The current gradient') >= 0:
293 getgrad = True
294 gradients = []
295 tempgrad = []
296 continue
297 if getgrad and '#' not in line:
298 grad = line.split()[-1]
299 tempgrad.append(float(grad))
300 if len(tempgrad) == 3:
301 gradients.append(tempgrad)
302 tempgrad = []
303 if '# The at' in line:
304 getgrad = False
306 forces = -np.array(gradients) * Hartree / Bohr
307 return forces
310@deprecated(
311 'Please use ase.io.read instead of read_orca_outputs, e.g.,\n'
312 'from ase.io import read \n'
313 'atoms = read("orca.out")',
314 DeprecationWarning,
315)
316def read_orca_outputs(directory, stdout_path):
317 """Reproduces old functionality of reading energy, forces etc
318 directly from output without creation of atoms object.
319 This is kept to ensure backwards compatability
320 .. deprecated:: 3.24.0
321 Use of read_orca_outputs is deprected, please
322 process ORCA output by using ase.io.read
323 e.g., read('orca.out')"
324 """
325 stdout_path = Path(stdout_path)
326 atoms = read_orca_output(stdout_path, index=-1)
327 results = {}
328 results['energy'] = atoms.get_total_energy()
329 results['free_energy'] = atoms.get_total_energy()
331 try:
332 results['dipole'] = atoms.get_dipole_moment()
333 except PropertyNotImplementedError:
334 pass
336 # Does engrad always exist? - No!
337 # Will there be other files -No -> We should just take engrad
338 # as a direct argument. Or maybe this function does not even need to
339 # exist.
340 engrad_path = stdout_path.with_suffix('.engrad')
341 if engrad_path.is_file():
342 results['forces'] = read_orca_engrad(engrad_path)
343 print("""Warning: If you are reading in an engrad file from a
344 geometry optimization, check very carefully.
345 ORCA does not by default supply the forces for the
346 converged geometry!""")
347 return results