Coverage for /builds/ase/ase/ase/io/gaussian.py: 95.28%
635 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3import logging
4import re
5import warnings
6from collections.abc import Iterable
7from copy import deepcopy
9import numpy as np
11from ase import Atoms
12from ase.calculators.calculator import Calculator, InputError
13from ase.calculators.gaussian import Gaussian
14from ase.calculators.singlepoint import SinglePointCalculator
15from ase.data import atomic_masses_iupac2016, chemical_symbols
16from ase.io import ParseError
17from ase.io.zmatrix import parse_zmatrix
18from ase.units import Bohr, Debye, Hartree
20logger = logging.getLogger(__name__)
22_link0_keys = [
23 'mem',
24 'chk',
25 'oldchk',
26 'schk',
27 'rwf',
28 'oldmatrix',
29 'oldrawmatrix',
30 'int',
31 'd2e',
32 'save',
33 'nosave',
34 'errorsave',
35 'cpu',
36 'nprocshared',
37 'gpucpu',
38 'lindaworkers',
39 'usessh',
40 'ssh',
41 'debuglinda',
42]
45_link0_special = [
46 'kjob',
47 'subst',
48]
51# Certain problematic methods do not provide well-defined potential energy
52# surfaces, because these "composite" methods involve geometry optimization
53# and/or vibrational frequency analysis. In addition, the "energy" calculated
54# by these methods are typically ZPVE corrected and/or temperature dependent
55# free energies.
56_problem_methods = [
57 'cbs-4m', 'cbs-qb3', 'cbs-apno',
58 'g1', 'g2', 'g3', 'g4', 'g2mp2', 'g3mp2', 'g3b3', 'g3mp2b3', 'g4mp4',
59 'w1', 'w1u', 'w1bd', 'w1ro',
60]
63_xc_to_method = dict(
64 pbe='pbepbe',
65 pbe0='pbe1pbe',
66 hse06='hseh1pbe',
67 hse03='ohse2pbe',
68 lda='svwn', # gaussian "knows about" LSDA, but maybe not LDA.
69 tpss='tpsstpss',
70 revtpss='revtpssrevtpss',
71)
73_nuclear_prop_names = ['spin', 'zeff', 'qmom', 'nmagm', 'znuc',
74 'radnuclear', 'iso']
77def _get_molecule_spec(atoms, nuclear_props):
78 ''' Generate the molecule specification section to write
79 to the Gaussian input file, from the Atoms object and dict
80 of nuclear properties'''
81 molecule_spec = []
82 for i, atom in enumerate(atoms):
83 symbol_section = atom.symbol + '('
84 # Check whether any nuclear properties of the atom have been set,
85 # and if so, add them to the symbol section.
86 nuclear_props_set = False
87 for keyword, array in nuclear_props.items():
88 if array is not None and array[i] is not None:
89 string = keyword + '=' + str(array[i]) + ', '
90 symbol_section += string
91 nuclear_props_set = True
93 # Check whether the mass of the atom has been modified,
94 # and if so, add it to the symbol section:
95 mass_set = False
96 symbol = atom.symbol
97 expected_mass = atomic_masses_iupac2016[chemical_symbols.index(
98 symbol)]
99 if expected_mass != atoms[i].mass:
100 mass_set = True
101 string = 'iso' + '=' + str(atoms[i].mass)
102 symbol_section += string
104 if nuclear_props_set or mass_set:
105 symbol_section = symbol_section.strip(', ')
106 symbol_section += ')'
107 else:
108 symbol_section = symbol_section.strip('(')
110 # Then attach the properties appropriately
111 # this formatting was chosen for backwards compatibility reasons, but
112 # it would probably be better to
113 # 1) Ensure proper spacing between entries with explicit spaces
114 # 2) Use fewer columns for the element
115 # 3) Use 'e' (scientific notation) instead of 'f' for positions
116 molecule_spec.append('{:<10s}{:20.10f}{:20.10f}{:20.10f}'.format(
117 symbol_section, *atom.position))
119 # unit cell vectors, in case of periodic boundary conditions
120 for ipbc, tv in zip(atoms.pbc, atoms.cell):
121 if ipbc:
122 molecule_spec.append('TV {:20.10f}{:20.10f}{:20.10f}'.format(*tv))
124 molecule_spec.append('')
126 return molecule_spec
129def _format_output_type(output_type):
130 ''' Given a letter: output_type, return
131 a string formatted for a gaussian input file'''
132 # Change output type to P if it has been set to T, because ASE
133 # does not support reading info from 'terse' output files.
134 if output_type is None or output_type == '' or 't' in output_type.lower():
135 output_type = 'P'
137 return f'#{output_type}'
140def _check_problem_methods(method):
141 ''' Check method string for problem methods and warn appropriately'''
142 if method.lower() in _problem_methods:
143 warnings.warn(
144 'The requested method, {}, is a composite method. Composite '
145 'methods do not have well-defined potential energy surfaces, '
146 'so the energies, forces, and other properties returned by '
147 'ASE may not be meaningful, or they may correspond to a '
148 'different geometry than the one provided. '
149 'Please use these methods with caution.'.format(method)
150 )
153def _pop_link0_params(params):
154 '''Takes the params dict and returns a dict with the link0 keywords
155 removed, and a list containing the link0 keywords and options
156 to be written to the gaussian input file'''
157 params = deepcopy(params)
158 out = []
159 # Remove keywords from params, so they are not set again later in
160 # route section
161 for key in _link0_keys:
162 if key not in params:
163 continue
164 val = params.pop(key)
165 if not val or (isinstance(val, str) and key.lower() == val.lower()):
166 out.append(f'%{key}')
167 else:
168 out.append(f'%{key}={val}')
170 # These link0 keywords have a slightly different syntax
171 for key in _link0_special:
172 if key not in params:
173 continue
174 val = params.pop(key)
175 if not isinstance(val, str) and isinstance(val, Iterable):
176 val = ' '.join(val)
177 out.append(f'%{key} L{val}')
179 return params, out
182def _format_method_basis(output_type, method, basis, fitting_basis):
183 output_string = ""
184 if basis and method and fitting_basis:
185 output_string = '{} {}/{}/{} ! ASE formatted method and basis'.format(
186 output_type, method, basis, fitting_basis)
187 elif basis and method:
188 output_string = '{} {}/{} ! ASE formatted method and basis'.format(
189 output_type, method, basis)
190 else:
191 output_string = f'{output_type}'
192 for value in [method, basis]:
193 if value is not None:
194 output_string += f' {value}'
195 return output_string
198def _format_route_params(params):
199 '''Get keywords and values from the params dictionary and return
200 as a list of lines to add to the gaussian input file'''
201 out = []
202 for key, val in params.items():
203 # assume bare keyword if val is falsey, i.e. '', None, False, etc.
204 # also, for backwards compatibility: assume bare keyword if key and
205 # val are the same
206 if not val or (isinstance(val, str) and key.lower() == val.lower()):
207 out.append(key)
208 elif not isinstance(val, str) and isinstance(val, Iterable):
209 out.append('{}({})'.format(key, ','.join(val)))
210 else:
211 out.append(f'{key}({val})')
212 return out
215def _format_addsec(addsec):
216 '''Format addsec string as a list of lines to be added to the gaussian
217 input file.'''
218 out = []
219 if addsec is not None:
220 out.append('')
221 if isinstance(addsec, str):
222 out.append(addsec)
223 elif isinstance(addsec, Iterable):
224 out += list(addsec)
225 return out
228def _format_basis_set(basis, basisfile, basis_set):
229 '''Format either: the basis set filename (basisfile), the basis set file
230 contents (from reading basisfile), or the basis_set text as a list of
231 strings to be added to the gaussian input file.'''
232 out = []
233 # if basis='gen', set basisfile. Either give a path to a basisfile, or
234 # read in the provided file and paste it verbatim
235 if basisfile is not None:
236 if basisfile[0] == '@':
237 out.append(basisfile)
238 else:
239 with open(basisfile) as fd:
240 out.append(fd.read())
241 elif basis_set is not None:
242 out.append(basis_set)
243 else:
244 if basis is not None and basis.lower() == 'gen':
245 raise InputError('Please set basisfile or basis_set')
246 return out
249def write_gaussian_in(fd, atoms, properties=['energy'],
250 method=None, basis=None, fitting_basis=None,
251 output_type='P', basisfile=None, basis_set=None,
252 xc=None, charge=None, mult=None, extra=None,
253 ioplist=None, addsec=None, spinlist=None,
254 zefflist=None, qmomlist=None, nmagmlist=None,
255 znuclist=None, radnuclearlist=None,
256 **params):
257 '''
258 Generates a Gaussian input file
260 Parameters
261 -----------
262 fd: file-like
263 where the Gaussian input file will be written
264 atoms: Atoms
265 Structure to write to the input file
266 properties: list
267 Properties to calculate
268 method: str
269 Level of theory to use, e.g. ``hf``, ``ccsd``, ``mp2``, or ``b3lyp``.
270 Overrides ``xc`` (see below).
271 xc: str
272 Level of theory to use. Translates several XC functionals from
273 their common name (e.g. ``PBE``) to their internal Gaussian name
274 (e.g. ``PBEPBE``).
275 basis: str
276 The basis set to use. If not provided, no basis set will be requested,
277 which usually results in ``STO-3G``. Maybe omitted if basisfile is set
278 (see below).
279 fitting_basis: str
280 The name of the fitting basis set to use.
281 output_type: str
282 Level of output to record in the Gaussian
283 output file - this may be ``N``- normal or ``P`` -
284 additional.
285 basisfile: str
286 The name of the basis file to use. If a value is provided, basis may
287 be omitted (it will be automatically set to 'gen')
288 basis_set: str
289 The basis set definition to use. This is an alternative
290 to basisfile, and would be the same as the contents
291 of such a file.
292 charge: int
293 The system charge. If not provided, it will be automatically
294 determined from the ``Atoms`` object’s initial_charges.
295 mult: int
296 The system multiplicity (``spin + 1``). If not provided, it will be
297 automatically determined from the ``Atoms`` object’s
298 ``initial_magnetic_moments``.
299 extra: str
300 Extra lines to be included in the route section verbatim.
301 It should not be necessary to use this, but it is included for
302 backwards compatibility.
303 ioplist: list
304 A collection of IOPs definitions to be included in the route line.
305 addsec: str
306 Text to be added after the molecular geometry specification, e.g. for
307 defining masses with ``freq=ReadIso``.
308 spinlist: list
309 A list of nuclear spins to be added into the nuclear
310 propeties section of the molecule specification.
311 zefflist: list
312 A list of effective charges to be added into the nuclear
313 propeties section of the molecule specification.
314 qmomlist: list
315 A list of nuclear quadropole moments to be added into
316 the nuclear propeties section of the molecule
317 specification.
318 nmagmlist: list
319 A list of nuclear magnetic moments to be added into
320 the nuclear propeties section of the molecule
321 specification.
322 znuclist: list
323 A list of nuclear charges to be added into the nuclear
324 propeties section of the molecule specification.
325 radnuclearlist: list
326 A list of nuclear radii to be added into the nuclear
327 propeties section of the molecule specification.
328 params: dict
329 Contains any extra keywords and values that will be included in either
330 the link0 section or route section of the gaussian input file.
331 To be included in the link0 section, the keyword must be one of the
332 following: ``mem``, ``chk``, ``oldchk``, ``schk``, ``rwf``,
333 ``oldmatrix``, ``oldrawmatrix``, ``int``, ``d2e``, ``save``,
334 ``nosave``, ``errorsave``, ``cpu``, ``nprocshared``, ``gpucpu``,
335 ``lindaworkers``, ``usessh``, ``ssh``, ``debuglinda``.
336 Any other keywords will be placed (along with their values) in the
337 route section.
338 '''
340 params = deepcopy(params)
342 if properties is None:
343 properties = ['energy']
345 output_type = _format_output_type(output_type)
347 # basis can be omitted if basisfile is provided
348 if basis is None:
349 if basisfile is not None or basis_set is not None:
350 basis = 'gen'
352 # determine method from xc if it is provided
353 if method is None:
354 if xc is not None:
355 method = _xc_to_method.get(xc.lower(), xc)
357 # If the user requests a problematic method, rather than raising an error
358 # or proceeding blindly, give the user a warning that the results parsed
359 # by ASE may not be meaningful.
360 if method is not None:
361 _check_problem_methods(method)
363 # determine charge from initial charges if not passed explicitly
364 if charge is None:
365 charge = atoms.get_initial_charges().sum()
367 # determine multiplicity from initial magnetic moments
368 # if not passed explicitly
369 if mult is None:
370 mult = atoms.get_initial_magnetic_moments().sum() + 1
372 # set up link0 arguments
373 out = []
374 params, link0_list = _pop_link0_params(params)
375 out.extend(link0_list)
377 # begin route line
378 # note: unlike in old calculator, each route keyword is put on its own
379 # line.
380 out.append(_format_method_basis(output_type, method, basis, fitting_basis))
382 # If the calculator's parameter dictionary contains an isolist, we ignore
383 # this - it is up to the user to attach this info as the atoms' masses
384 # if they wish for it to be used:
385 params.pop('isolist', None)
387 # Any params left will belong in the route section of the file:
388 out.extend(_format_route_params(params))
390 if ioplist is not None:
391 out.append('IOP(' + ', '.join(ioplist) + ')')
393 # raw list of explicit keywords for backwards compatibility
394 if extra is not None:
395 out.append(extra)
397 # Add 'force' iff the user requested forces, since Gaussian crashes when
398 # 'force' is combined with certain other keywords such as opt and irc.
399 if 'forces' in properties and 'force' not in params:
400 out.append('force')
402 # header, charge, and mult
403 out += ['', 'Gaussian input prepared by ASE', '',
404 f'{charge:.0f} {mult:.0f}']
406 # make dict of nuclear properties:
407 nuclear_props = {'spin': spinlist, 'zeff': zefflist, 'qmom': qmomlist,
408 'nmagm': nmagmlist, 'znuc': znuclist,
409 'radnuclear': radnuclearlist}
410 nuclear_props = {k: v for k, v in nuclear_props.items() if v is not None}
412 # atomic positions and nuclear properties:
413 molecule_spec = _get_molecule_spec(atoms, nuclear_props)
414 for line in molecule_spec:
415 out.append(line)
417 out.extend(_format_basis_set(basis, basisfile, basis_set))
419 out.extend(_format_addsec(addsec))
421 out += ['', '']
422 fd.write('\n'.join(out))
425# Regexp for reading an input file:
427_re_link0 = re.compile(r'^\s*%([^\=\)\(!]+)=?([^\=\)\(!]+)?(!.+)?')
428# Link0 lines are in the format:
429# '% keyword = value' or '% keyword'
430# (with or without whitespaces)
432_re_output_type = re.compile(r'^\s*#\s*([NPTnpt]?)\s*')
433# The start of the route section begins with a '#', and then may
434# be followed by the desired level of output in the output file: P, N or T.
436_re_method_basis = re.compile(
437 r"\s*([\w-]+)\s*\/([^/=!]+)([\/]([^!]+))?\s*(!.+)?")
438# Matches method, basis and optional fitting basis in the format:
439# method/basis/fitting_basis ! comment
440# They will appear in this format if the Gaussian file has been generated
441# by ASE using a calculator with the basis and method keywords set.
443_re_chgmult = re.compile(r'^(\s*[+-]?\d+(?:,\s*|\s+)[+-]?\d+\s*){1,}$')
444# This is a bit more complex of a regex than we typically want, but it
445# can be difficult to determine whether a line contains the charge and
446# multiplicity, rather than just another route keyword. By making sure
447# that the line contains exactly an even number of *integers*, separated by
448# either a comma (and possibly whitespace) or some amount of whitespace, we
449# can be more confident that we've actually found the charge and multiplicity.
450# Note that in recent versions of Gaussian, the gjf file can contain fragments,
451# where you can give a charge and multiplicity to each fragment. This pattern
452# will allow ASE to read this line in for the charge and multiplicity, however
453# the _get_charge_mult method will only input the first two integers that
454# always gives the overall charge and multiplcity for the full chemical system.
455# The charge and multiplicity of the fragments will be ignored in the
456# _get_charge_mult method.
458_re_nuclear_props = re.compile(r'\(([^\)]+)\)')
459# Matches the nuclear properties, which are contained in parantheses.
461# The following methods are used in GaussianConfiguration's
462# parse_gaussian_input method:
465def _get_link0_param(link0_match):
466 '''Gets link0 keyword and option from a re.Match and returns them
467 in a dictionary format'''
468 value = link0_match.group(2)
469 if value is not None:
470 value = value.strip()
471 else:
472 value = ''
473 return {link0_match.group(1).lower().strip(): value.lower()}
476def _get_all_link0_params(link0_section):
477 ''' Given a string link0_section which contains the link0
478 section of a gaussian input file, returns a dictionary of
479 keywords and values'''
480 parameters = {}
481 for line in link0_section:
482 link0_match = _re_link0.match(line)
483 link0_param = _get_link0_param(link0_match)
484 if link0_param is not None:
485 parameters.update(link0_param)
486 return parameters
489def _convert_to_symbol(string):
490 '''Converts an input string into a format
491 that can be input to the 'symbol' parameter of an
492 ASE Atom object (can be a chemical symbol (str)
493 or an atomic number (int)).
494 This is achieved by either stripping any
495 integers from the string, or converting a string
496 containing an atomic number to integer type'''
497 symbol = _validate_symbol_string(string)
498 if symbol.isnumeric():
499 atomic_number = int(symbol)
500 symbol = chemical_symbols[atomic_number]
501 else:
502 match = re.match(r'([A-Za-z]+)', symbol)
503 symbol = match.group(1)
504 return symbol
507def _validate_symbol_string(string):
508 if "-" in string:
509 raise ParseError("ERROR: Could not read the Gaussian input file, as"
510 " molecule specifications for molecular mechanics "
511 "calculations are not supported.")
512 return string
515def _get_key_value_pairs(line):
516 '''Read line of a gaussian input file with keywords and options
517 separated according to the rules of the route section.
519 Parameters
520 ----------
521 line (string)
522 A line of a gaussian input file.
524 Returns
525 ---------
526 params (dict)
527 Contains the keywords and options found in the line.
528 '''
529 params = {}
530 line = line.strip(' #')
531 line = line.split('!')[0] # removes any comments
532 # First, get the keywords and options sepatated with
533 # parantheses:
534 match_iterator = re.finditer(r'\(([^\)]+)\)', line)
535 index_ranges = []
536 for match in match_iterator:
537 index_range = [match.start(0), match.end(0)]
538 options = match.group(1)
539 # keyword is last word in previous substring:
540 keyword_string = line[:match.start(0)]
541 keyword_match_iter = [k for k in re.finditer(
542 r'[^\,/\s]+', keyword_string) if k.group() != '=']
543 keyword = keyword_match_iter[-1].group().strip(' =')
544 index_range[0] = keyword_match_iter[-1].start()
545 params.update({keyword.lower(): options.lower()})
546 index_ranges.append(index_range)
548 # remove from the line the keywords and options that we have saved:
549 index_ranges.reverse()
550 for index_range in index_ranges:
551 start = index_range[0]
552 stop = index_range[1]
553 line = line[0: start:] + line[stop + 1::]
555 # Next, get the keywords and options separated with
556 # an equals sign, and those without an equals sign
557 # must be keywords without options:
559 # remove any whitespaces around '=':
560 line = re.sub(r'\s*=\s*', '=', line)
561 line = [x for x in re.split(r'[\s,\/]', line) if x != '']
563 for s in line:
564 if '=' in s:
565 s = s.split('=')
566 keyword = s.pop(0)
567 options = s.pop(0)
568 params.update({keyword.lower(): options.lower()})
569 else:
570 if len(s) > 0:
571 params.update({s.lower(): None})
573 return params
576def _get_route_params(line):
577 '''Reads keywords and values from a line in
578 a Gaussian input file's route section,
579 and returns them as a dictionary'''
580 method_basis_match = _re_method_basis.match(line)
581 if method_basis_match:
582 params = {}
583 ase_gen_comment = '! ASE formatted method and basis'
584 if method_basis_match.group(5) == ase_gen_comment:
585 params['method'] = method_basis_match.group(1).strip().lower()
586 params['basis'] = method_basis_match.group(2).strip().lower()
587 if method_basis_match.group(4):
588 params['fitting_basis'] = method_basis_match.group(
589 4).strip().lower()
590 return params
592 return _get_key_value_pairs(line)
595def _get_all_route_params(route_section):
596 ''' Given a string: route_section which contains the route
597 section of a gaussian input file, returns a dictionary of
598 keywords and values'''
599 parameters = {}
601 for line in route_section:
602 output_type_match = _re_output_type.match(line)
603 if not parameters.get('output_type') and output_type_match:
604 line = line.strip(output_type_match.group(0))
605 parameters.update(
606 {'output_type': output_type_match.group(1).lower()})
607 # read route section
608 route_params = _get_route_params(line)
609 if route_params is not None:
610 parameters.update(route_params)
611 return parameters
614def _get_charge_mult(chgmult_section):
615 '''return a dict with the charge and multiplicity from
616 a list chgmult_section that contains the charge and multiplicity
617 line, read from a gaussian input file'''
618 chgmult_match = _re_chgmult.match(str(chgmult_section))
619 try:
620 chgmult = chgmult_match.group(0).split()
621 return {'charge': int(chgmult[0]), 'mult': int(chgmult[1])}
622 except (IndexError, AttributeError):
623 raise ParseError("ERROR: Could not read the charge and "
624 "multiplicity from the Gaussian input "
625 "file. There must be an even number of "
626 "integers separated with whitespace or "
627 "a comma, where the first two integers "
628 "must be the overall charge and overall "
629 "multiplicity of the chemical system, "
630 "respectively.")
633def _get_nuclear_props(line):
634 ''' Reads any info in parantheses in the line and returns
635 a dictionary of the nuclear properties.'''
636 nuclear_props_match = _re_nuclear_props.search(line)
637 nuclear_props = {}
638 if nuclear_props_match:
639 nuclear_props = _get_key_value_pairs(nuclear_props_match.group(1))
640 updated_nuclear_props = {}
641 for key, value in nuclear_props.items():
642 if value.isnumeric():
643 value = int(value)
644 else:
645 value = float(value)
646 if key not in _nuclear_prop_names:
647 if "fragment" in key:
648 warnings.warn("Fragments are not "
649 "currently supported.")
650 warnings.warn("The following nuclear properties "
651 "could not be saved: {}".format(
652 {key: value}))
653 else:
654 updated_nuclear_props[key] = value
655 nuclear_props = updated_nuclear_props
657 for k in _nuclear_prop_names:
658 if k not in nuclear_props:
659 nuclear_props[k] = None
661 return nuclear_props
664def _get_atoms_info(line):
665 '''Returns the symbol and position of an atom from a line
666 in the molecule specification section'''
667 nuclear_props_match = _re_nuclear_props.search(line)
668 if nuclear_props_match:
669 line = line.replace(nuclear_props_match.group(0), '')
670 tokens = line.split()
671 symbol = _convert_to_symbol(tokens[0])
672 pos = list(tokens[1:])
674 return symbol, pos
677def _get_cartesian_atom_coords(symbol, pos):
678 '''Returns the coordinates: pos as a list of floats if they
679 are cartesian, and not in z-matrix format'''
680 if len(pos) < 3 or (pos[0] == '0' and symbol != 'TV'):
681 # In this case, we have a z-matrix definition, so
682 # no cartesian coords.
683 return None
684 elif len(pos) > 3:
685 raise ParseError("ERROR: Gaussian input file could "
686 "not be read as freeze codes are not"
687 " supported. If using cartesian "
688 "coordinates, these must be "
689 "given as 3 numbers separated "
690 "by whitespace.")
691 else:
692 try:
693 return list(map(float, pos))
694 except ValueError:
695 raise ParseError(
696 "ERROR: Molecule specification in"
697 "Gaussian input file could not be read")
700def _get_zmatrix_line(line):
701 ''' Converts line into the format needed for it to
702 be added to the z-matrix contents '''
703 line_list = line.split()
704 if len(line_list) == 8 and line_list[7] == '1':
705 raise ParseError(
706 "ERROR: Could not read the Gaussian input file"
707 ", as the alternative Z-matrix format using "
708 "two bond angles instead of a bond angle and "
709 "a dihedral angle is not supported.")
710 return line.strip() + '\n'
713def _read_zmatrix(zmatrix_contents, zmatrix_vars=None):
714 ''' Reads a z-matrix (zmatrix_contents) using its list of variables
715 (zmatrix_vars), and returns atom positions and symbols '''
716 try:
717 atoms = parse_zmatrix(zmatrix_contents, defs=zmatrix_vars)
718 except (ValueError, RuntimeError) as e:
719 raise ParseError("Failed to read Z-matrix from "
720 "Gaussian input file: ", e)
721 except KeyError as e:
722 raise ParseError("Failed to read Z-matrix from "
723 "Gaussian input file, as symbol: {}"
724 "could not be recognised. Please make "
725 "sure you use element symbols, not "
726 "atomic numbers in the element labels.".format(e))
727 positions = atoms.positions
728 symbols = atoms.get_chemical_symbols()
729 return positions, symbols
732def _get_nuclear_props_for_all_atoms(nuclear_props):
733 ''' Returns the nuclear properties for all atoms as a dictionary,
734 in the format needed for it to be added to the parameters dictionary.'''
735 params = {k + 'list': [] for k in _nuclear_prop_names}
736 for dictionary in nuclear_props:
737 for key, value in dictionary.items():
738 params[key + 'list'].append(value)
740 for key, array in params.items():
741 values_set = False
742 for value in array:
743 if value is not None:
744 values_set = True
745 if not values_set:
746 params[key] = None
747 return params
750def _get_atoms_from_molspec(molspec_section):
751 ''' Takes a string: molspec_section which contains the molecule
752 specification section of a gaussian input file, and returns an atoms
753 object that represents this.'''
754 # These will contain info that will be attached to the Atoms object:
755 symbols = []
756 positions = []
757 pbc = np.zeros(3, dtype=bool)
758 cell = np.zeros((3, 3))
759 npbc = 0
761 # Will contain a dictionary of nuclear properties for each atom,
762 # that will later be saved to the parameters dict:
763 nuclear_props = []
765 # Info relating to the z-matrix definition (if set)
766 zmatrix_type = False
767 zmatrix_contents = ""
768 zmatrix_var_section = False
769 zmatrix_vars = ""
771 for line in molspec_section:
772 # Remove any comments and replace '/' and ',' with whitespace,
773 # as these are equivalent:
774 line = line.split('!')[0].replace('/', ' ').replace(',', ' ')
775 if (line.split()):
776 if zmatrix_type:
777 # Save any variables set when defining the z-matrix:
778 if zmatrix_var_section:
779 zmatrix_vars += line.strip() + '\n'
780 continue
781 elif 'variables' in line.lower():
782 zmatrix_var_section = True
783 continue
784 elif 'constants' in line.lower():
785 zmatrix_var_section = True
786 warnings.warn("Constants in the optimisation are "
787 "not currently supported. Instead "
788 "setting constants as variables.")
789 continue
791 symbol, pos = _get_atoms_info(line)
792 current_nuclear_props = _get_nuclear_props(line)
794 if not zmatrix_type:
795 pos = _get_cartesian_atom_coords(symbol, pos)
796 if pos is None:
797 zmatrix_type = True
799 if symbol.upper() == 'TV' and pos is not None:
800 pbc[npbc] = True
801 cell[npbc] = pos
802 npbc += 1
803 else:
804 nuclear_props.append(current_nuclear_props)
805 if not zmatrix_type:
806 symbols.append(symbol)
807 positions.append(pos)
809 if zmatrix_type:
810 zmatrix_contents += _get_zmatrix_line(line)
812 # Now that we are past the molecule spec. section, we can read
813 # the entire z-matrix (if set):
814 if len(positions) == 0:
815 if zmatrix_type:
816 if zmatrix_vars == '':
817 zmatrix_vars = None
818 positions, symbols = _read_zmatrix(
819 zmatrix_contents, zmatrix_vars)
821 try:
822 atoms = Atoms(symbols, positions, pbc=pbc, cell=cell)
823 except (IndexError, ValueError, KeyError) as e:
824 raise ParseError("ERROR: Could not read the Gaussian input file, "
825 "due to a problem with the molecule "
826 "specification: {}".format(e))
828 nuclear_props = _get_nuclear_props_for_all_atoms(nuclear_props)
830 return atoms, nuclear_props
833def _get_readiso_param(parameters):
834 ''' Returns a tuple containing the frequency
835 keyword and its options, if the frequency keyword is
836 present in parameters and ReadIso is one of its options'''
837 freq_options = parameters.get('freq', None)
838 if freq_options:
839 freq_name = 'freq'
840 else:
841 freq_options = parameters.get('frequency', None)
842 freq_name = 'frequency'
843 if freq_options is not None:
844 if ('readiso' or 'readisotopes') in freq_options:
845 return freq_name, freq_options
846 return None, None
849def _get_readiso_info(line, parameters):
850 '''Reads the temperature, pressure and scale from the first line
851 of a ReadIso section of a Gaussian input file. Returns these in
852 a dictionary.'''
853 readiso_params = {}
854 if _get_readiso_param(parameters)[0] is not None:
855 # when count_iso is 0 we are in the line where
856 # temperature, pressure, [scale] is saved
857 line = line.replace(
858 '[', '').replace(']', '')
859 tokens = line.strip().split()
860 try:
861 readiso_params['temperature'] = tokens[0]
862 readiso_params['pressure'] = tokens[1]
863 readiso_params['scale'] = tokens[2]
864 except IndexError:
865 pass
866 if readiso_params != {}:
868 return readiso_params
871def _delete_readiso_param(parameters):
872 '''Removes the readiso parameter from the parameters dict'''
873 parameters = deepcopy(parameters)
874 freq_name, freq_options = _get_readiso_param(parameters)
875 if freq_name is not None:
876 if 'readisotopes' in freq_options:
877 iso_name = 'readisotopes'
878 else:
879 iso_name = 'readiso'
880 freq_options = [v.group() for v in re.finditer(
881 r'[^\,/\s]+', freq_options)]
882 freq_options.remove(iso_name)
883 new_freq_options = ''
884 for v in freq_options:
885 new_freq_options += v + ' '
886 if new_freq_options == '':
887 new_freq_options = None
888 else:
889 new_freq_options = new_freq_options.strip()
890 parameters[freq_name] = new_freq_options
891 return parameters
894def _update_readiso_params(parameters, symbols):
895 ''' Deletes the ReadIso option from the route section as we
896 write out the masses in the nuclear properties section
897 instead of the ReadIso section.
898 Ensures the masses array is the same length as the
899 symbols array. This is necessary due to the way the
900 ReadIso section is defined:
901 The mass of each atom is listed on a separate line, in
902 order of appearance in the molecule spec. A blank line
903 indicates not to modify the mass for that atom.
904 But you do not have to leave blank lines equal to the
905 remaining atoms after you finsihed setting masses.
906 E.g. if you had 10 masses and only want to set the mass
907 for the first atom, you don't have to leave 9 blank lines
908 after it.
909 '''
910 parameters = _delete_readiso_param(parameters)
911 if parameters.get('isolist') is not None:
912 if len(parameters['isolist']) < len(symbols):
913 for _ in range(len(symbols) - len(parameters['isolist'])):
914 parameters['isolist'].append(None)
915 if all(m is None for m in parameters['isolist']):
916 parameters['isolist'] = None
918 return parameters
921def _validate_params(parameters):
922 '''Checks whether all of the required parameters exist in the
923 parameters dict and whether it contains any unsupported settings
924 '''
925 # Check for unsupported settings
926 unsupported_settings = {
927 "z-matrix", "modredun", "modredundant", "addredundant", "addredun",
928 "readopt", "rdopt"}
930 for s in unsupported_settings:
931 for v in parameters.values():
932 if v is not None and s in str(v):
933 raise ParseError(
934 "ERROR: Could not read the Gaussian input file"
935 ", as the option: {} is currently unsupported."
936 .format(s))
938 for k in list(parameters.keys()):
939 if "popt" in k:
940 parameters["opt"] = parameters.pop(k)
941 warnings.warn("The option {} is currently unsupported. "
942 "This has been replaced with {}."
943 .format("POpt", "opt"))
944 return
947def _get_extra_section_params(extra_section, parameters, atoms):
948 ''' Takes a list of strings: extra_section, which contains
949 the 'extra' lines in a gaussian input file. Also takes the parameters
950 that have been read so far, and the atoms that have been read from the
951 file.
952 Returns an updated version of the parameters dict, containing details from
953 this extra section. This may include the basis set definition or filename,
954 and/or the readiso section.'''
956 new_parameters = deepcopy(parameters)
957 # Will store the basis set definition (if set):
958 basis_set = ""
959 # This will indicate whether we have a readiso section:
960 readiso = _get_readiso_param(new_parameters)[0]
961 # This will indicate which line of the readiso section we are reading:
962 count_iso = 0
963 readiso_masses = []
965 for line in extra_section:
966 if line.split():
967 # check that the line isn't just a comment
968 if line.split()[0] == '!':
969 continue
970 line = line.strip().split('!')[0]
972 if len(line) > 0 and line[0] == '@':
973 # If the name of a basis file is specified, this line
974 # begins with a '@'
975 new_parameters['basisfile'] = line
976 elif readiso and count_iso < len(atoms.symbols) + 1:
977 # The ReadIso section has 1 more line than the number of
978 # symbols
979 if count_iso == 0 and line != '\n':
980 # The first line in the readiso section contains the
981 # temperature, pressure, scale. Here we save this:
982 readiso_info = _get_readiso_info(line, new_parameters)
983 if readiso_info is not None:
984 new_parameters.update(readiso_info)
985 # If the atom masses were set in the nuclear properties
986 # section, they will be overwritten by the ReadIso
987 # section
988 readiso_masses = []
989 count_iso += 1
990 elif count_iso > 0:
991 # The remaining lines in the ReadIso section are
992 # the masses of the atoms
993 try:
994 readiso_masses.append(float(line))
995 except ValueError:
996 readiso_masses.append(None)
997 count_iso += 1
998 else:
999 # If the rest of the file is not the ReadIso section,
1000 # then it must be the definition of the basis set.
1001 if new_parameters.get('basis', '') == 'gen' \
1002 or 'gen' in new_parameters:
1003 if line.strip() != '':
1004 basis_set += line + '\n'
1006 if readiso:
1007 new_parameters['isolist'] = readiso_masses
1008 new_parameters = _update_readiso_params(new_parameters, atoms.symbols)
1010 # Saves the basis set definition to the parameters array if
1011 # it has been set:
1012 if basis_set != '':
1013 new_parameters['basis_set'] = basis_set
1014 new_parameters['basis'] = 'gen'
1015 new_parameters.pop('gen', None)
1017 return new_parameters
1020def _get_gaussian_in_sections(fd):
1021 ''' Reads a gaussian input file and returns
1022 a dictionary of the sections of the file - each dictionary
1023 value is a list of strings - each string is a line in that
1024 section. '''
1025 # These variables indicate which section of the
1026 # input file is currently being read:
1027 route_section = False
1028 atoms_section = False
1029 atoms_saved = False
1030 # Here we will store the sections of the file in a dictionary,
1031 # as lists of strings
1032 gaussian_sections = {'link0': [], 'route': [],
1033 'charge_mult': [], 'mol_spec': [], 'extra': []}
1035 for line in fd:
1036 line = line.strip(' ')
1037 link0_match = _re_link0.match(line)
1038 output_type_match = _re_output_type.match(line)
1039 chgmult_match = _re_chgmult.match(line)
1040 if link0_match:
1041 gaussian_sections['link0'].append(line)
1042 # The first blank line appears at the end of the route section
1043 # and a blank line appears at the end of the atoms section:
1044 elif line == '\n' and (route_section or atoms_section):
1045 route_section = False
1046 atoms_section = False
1047 elif output_type_match or route_section:
1048 route_section = True
1049 gaussian_sections['route'].append(line)
1050 elif chgmult_match:
1051 gaussian_sections['charge_mult'] = line
1052 # After the charge and multiplicty have been set, the
1053 # molecule specification section of the input file begins:
1054 atoms_section = True
1055 elif atoms_section:
1056 gaussian_sections['mol_spec'].append(line)
1057 atoms_saved = True
1058 elif atoms_saved:
1059 # Next we read the other sections below the molecule spec.
1060 # This may include the ReadIso section and the basis set
1061 # definition or filename
1062 gaussian_sections['extra'].append(line)
1064 return gaussian_sections
1067class GaussianConfiguration:
1069 def __init__(self, atoms, parameters):
1070 self.atoms = atoms.copy()
1071 self.parameters = deepcopy(parameters)
1073 def get_atoms(self):
1074 return self.atoms
1076 def get_parameters(self):
1077 return self.parameters
1079 def get_calculator(self):
1080 # Explicitly set parameters that must for security reasons not be
1081 # taken from file:
1082 calc = Gaussian(atoms=self.atoms, command=None, restart=None,
1083 ignore_bad_restart_file=Calculator._deprecated,
1084 label='Gaussian', directory='.', **self.parameters)
1085 return calc
1087 @staticmethod
1088 def parse_gaussian_input(fd):
1089 '''Reads a gaussian input file into an atoms object and
1090 parameters dictionary.
1092 Parameters
1093 ----------
1094 fd: file-like
1095 Contains the contents of a gaussian input file
1097 Returns
1098 ---------
1099 GaussianConfiguration
1100 Contains an atoms object created using the structural
1101 information from the input file.
1102 Contains a parameters dictionary, which stores any
1103 keywords and options found in the link-0 and route
1104 sections of the input file.
1105 '''
1106 # The parameters dict to attach to the calculator
1107 parameters = {}
1109 file_sections = _get_gaussian_in_sections(fd)
1111 # Update the parameters dictionary with the keywords and values
1112 # from each section of the input file:
1113 parameters.update(_get_all_link0_params(file_sections['link0']))
1114 parameters.update(_get_all_route_params(file_sections['route']))
1115 parameters.update(_get_charge_mult(file_sections['charge_mult']))
1116 atoms, nuclear_props = _get_atoms_from_molspec(
1117 file_sections['mol_spec'])
1118 parameters.update(nuclear_props)
1119 parameters.update(_get_extra_section_params(
1120 file_sections['extra'], parameters, atoms))
1122 _validate_params(parameters)
1124 return GaussianConfiguration(atoms, parameters)
1127def read_gaussian_in(fd, attach_calculator=False):
1128 '''
1129 Reads a gaussian input file and returns an Atoms object.
1131 Parameters
1132 ----------
1133 fd: file-like
1134 Contains the contents of a gaussian input file
1136 attach_calculator: bool
1137 When set to ``True``, a Gaussian calculator will be
1138 attached to the Atoms object which is returned.
1139 This will mean that additional information is read
1140 from the input file (see below for details).
1142 Returns
1143 ----------
1144 Atoms
1145 An Atoms object containing the following information that has been
1146 read from the input file: symbols, positions, cell.
1148 Notes
1149 ----------
1150 Gaussian input files can be read where the atoms' locations are set with
1151 cartesian coordinates or as a z-matrix. Variables may be used in the
1152 z-matrix definition, but currently setting constants for constraining
1153 a geometry optimisation is not supported. Note that the `alternative`
1154 z-matrix definition where two bond angles are set instead of a bond angle
1155 and a dihedral angle is not currently supported.
1157 If the parameter ``attach_calculator`` is set to ``True``, then the Atoms
1158 object is returned with a Gaussian calculator attached.
1160 This Gaussian calculator will contain a parameters dictionary which will
1161 contain the Link 0 commands and the options and keywords set in the route
1162 section of the Gaussian input file, as well as:
1164 • The charge and multiplicity
1166 • The selected level of output
1168 • The method, basis set and (optional) fitting basis set.
1170 • Basis file name/definition if set
1172 • Nuclear properties
1174 • Masses set in the nuclear properties section or in the ReadIsotopes
1175 section (if ``freq=ReadIso`` is set). These will be saved in an array
1176 with keyword ``isolist``, in the parameters dictionary. For these to
1177 actually be used in calculations and/or written out to input files,
1178 the Atoms object's masses must be manually updated to these values
1179 (this is not done automatically)
1181 If the Gaussian input file has been generated by ASE's
1182 ``write_gaussian_in`` method, then the basis set, method and fitting
1183 basis will be saved under the ``basis``, ``method`` and ``fitting_basis``
1184 keywords in the parameters dictionary. Otherwise they are saved as
1185 keywords in the parameters dict.
1187 Currently this does not support reading of any other sections which would
1188 be found below the molecule specification that have not been mentioned
1189 here (such as the ModRedundant section).
1190 '''
1191 gaussian_input = GaussianConfiguration.parse_gaussian_input(fd)
1192 atoms = gaussian_input.get_atoms()
1194 if attach_calculator:
1195 atoms.calc = gaussian_input.get_calculator()
1197 return atoms
1200# In the interest of using the same RE for both atomic positions and forces,
1201# we make one of the columns optional. That's because atomic positions have
1202# 6 columns, while forces only has 5 columns. Otherwise they are very similar.
1203_re_atom = re.compile(
1204 r'^\s*\S+\s+(\S+)\s+(?:\S+\s+)?(\S+)\s+(\S+)\s+(\S+)\s*$'
1205)
1206_re_forceblock = re.compile(r'^\s*Center\s+Atomic\s+Forces\s+\S+\s*$')
1207_re_l716 = re.compile(r'^\s*\(Enter .+l716.exe\)$')
1210def _compare_merge_configs(configs, new):
1211 """Append new to configs if it contains a new geometry or new data.
1213 Gaussian sometimes repeats a geometry, for example at the end of an
1214 optimization, or when a user requests vibrational frequency
1215 analysis in the same calculation as a geometry optimization.
1217 In those cases, rather than repeating the structure in the list of
1218 returned structures, try to merge results if doing so doesn't change
1219 any previously calculated values. If that's not possible, then create
1220 a new "image" with the new results.
1221 """
1222 if not configs:
1223 configs.append(new)
1224 return
1226 old = configs[-1]
1228 if old != new:
1229 configs.append(new)
1230 return
1232 oldres = old.calc.results
1233 newres = new.calc.results
1234 common_keys = set(oldres).intersection(newres)
1236 for key in common_keys:
1237 if np.any(oldres[key] != newres[key]):
1238 configs.append(new)
1239 return
1240 oldres.update(newres)
1243def _read_charges(fd):
1244 fd.readline()
1245 qs = []
1246 ms = []
1247 for line in fd:
1248 if not line.strip()[0].isdigit():
1249 break
1250 qs.append(float(line.split()[2]))
1251 if len(line.split()) > 3:
1252 ms.append(float(line.split()[3]))
1253 return {'charges': qs, 'magmoms': ms} if ms else {'charges': qs}
1256def read_gaussian_out(fd, index=-1):
1257 """Reads a gaussian output file and returns an Atoms object."""
1258 configs = []
1259 atoms = None
1260 energy = None
1261 results = {}
1262 orientation = None # Orientation of the coordinates stored in atoms
1263 for line in fd:
1264 line = line.strip()
1265 if line.startswith(r'1\1\GINC'):
1266 # We've reached the "archive" block at the bottom, stop parsing
1267 break
1269 if (line == 'Input orientation:'
1270 or line == 'Z-Matrix orientation:'
1271 or line == "Standard orientation:"):
1272 if atoms is not None:
1273 # Add configuration to the currently-parsed list
1274 # only after an energy or force has been parsed
1275 # (we assume these are in the output file)
1276 if energy is None:
1277 continue
1278 # "atoms" should store the first geometry encountered
1279 # in the input file which is often the input orientation,
1280 # which is the orientation for forces.
1281 # If there are forces and the orientation of atoms is not
1282 # the input coordinate system, warn the user
1283 if orientation != "Input" and 'forces' in results:
1284 logger.warning('Configuration geometry is not in the input'
1285 f'orientation. It is {orientation}')
1286 calc = SinglePointCalculator(atoms, energy=energy, **results)
1287 atoms.calc = calc
1288 _compare_merge_configs(configs, atoms)
1289 atoms = None
1290 orientation = line.split()[0] # Store the orientation
1291 energy = None
1292 results = {}
1294 numbers = []
1295 positions = []
1296 pbc = np.zeros(3, dtype=bool)
1297 cell = np.zeros((3, 3))
1298 npbc = 0
1299 # skip 4 irrelevant lines
1300 for _ in range(4):
1301 fd.readline()
1302 while True:
1303 match = _re_atom.match(fd.readline())
1304 if match is None:
1305 break
1306 number = int(match.group(1))
1307 pos = list(map(float, match.group(2, 3, 4)))
1308 if number == -2:
1309 pbc[npbc] = True
1310 cell[npbc] = pos
1311 npbc += 1
1312 else:
1313 numbers.append(max(number, 0))
1314 positions.append(pos)
1315 atoms = Atoms(numbers, positions, pbc=pbc, cell=cell)
1316 elif (line.startswith('Energy=')
1317 or line.startswith('SCF Done:')):
1318 # Some semi-empirical methods (Huckel, MINDO3, etc.),
1319 # or SCF methods (HF, DFT, etc.)
1320 energy = float(line.split('=')[1].split()[0].replace('D', 'e'))
1321 energy *= Hartree
1322 elif (line.startswith('E2 =') or line.startswith('E3 =')
1323 or line.startswith('E4(') or line.startswith('DEMP5 =')
1324 or line.startswith('E2(')):
1325 # MP{2,3,4,5} energy
1326 # also some double hybrid calculations, like B2PLYP
1327 energy = float(line.split('=')[-1].strip().replace('D', 'e'))
1328 energy *= Hartree
1329 elif line.startswith('Wavefunction amplitudes converged. E(Corr)'):
1330 # "correlated method" energy, e.g. CCSD
1331 energy = float(line.split('=')[-1].strip().replace('D', 'e'))
1332 energy *= Hartree
1333 elif line.startswith('CCSD(T)='):
1334 # CCSD(T) energy
1335 energy = float(line.split('=')[-1].strip().replace('D', 'e'))
1336 energy *= Hartree
1337 elif (
1338 line.startswith('Mulliken charges')
1339 or line.startswith('Lowdin Atomic Charges')
1340 or line.startswith('Hirshfeld charges, spin densities,')
1341 ):
1342 # Löwdin is printed after Mulliken and overwrites `charges`.
1343 # Hirshfeld is printed after Mulliken and overwrites `charges`.
1344 results.update(_read_charges(fd))
1345 elif line.startswith('Dipole moment') and energy is not None:
1346 # dipole moment in `l601.exe`, printed unless `Pop=None`
1347 # Skipped if energy is not printed in the same section.
1348 # This happens in the last geometry record when `opt` or `irc` is
1349 # specified. In this case, the record is compared with the previous
1350 # one in `_compare_merge_configs`, and there the dipole moment
1351 # from `l601` conflicts with the previous record from `l716`.
1352 line = fd.readline().strip()
1353 dipole = np.array([float(_) for _ in line.split()[1:6:2]]) * Debye
1354 results['dipole'] = dipole
1355 elif _re_l716.match(line):
1356 # Sometimes Gaussian will print "Rotating derivatives to
1357 # standard orientation" after the matched line (which looks like
1358 # "(Enter /opt/gaussian/g16/l716.exe)", though the exact path
1359 # depends on where Gaussian is installed). We *skip* the dipole
1360 # in this case, because it might be rotated relative to the input
1361 # orientation (and also it is numerically different even if the
1362 # standard orientation is the same as the input orientation).
1363 line = fd.readline().strip()
1364 if not line.startswith('Dipole'):
1365 continue
1366 dip = line.split('=')[1].replace('D', 'e')
1367 tokens = dip.split()
1368 dipole = []
1369 # dipole elements can run together, depending on what method was
1370 # used to calculate them. First see if there is a space between
1371 # values.
1372 if len(tokens) == 3:
1373 dipole = list(map(float, tokens))
1374 elif len(dip) % 3 == 0:
1375 # next, check if the number of tokens is divisible by 3
1376 nchars = len(dip) // 3
1377 for i in range(3):
1378 dipole.append(float(dip[nchars * i:nchars * (i + 1)]))
1379 else:
1380 # otherwise, just give up on trying to parse it.
1381 dipole = None
1382 continue
1383 # this dipole moment is printed in atomic units, e-Bohr
1384 # ASE uses e-Angstrom for dipole moments.
1385 results['dipole'] = np.array(dipole) * Bohr
1386 elif _re_forceblock.match(line):
1387 # skip 2 irrelevant lines
1388 fd.readline()
1389 fd.readline()
1390 forces = []
1391 while True:
1392 match = _re_atom.match(fd.readline())
1393 if match is None:
1394 break
1395 forces.append(list(map(float, match.group(2, 3, 4))))
1396 results['forces'] = np.array(forces) * Hartree / Bohr
1397 if atoms is not None:
1398 atoms.calc = SinglePointCalculator(atoms, energy=energy, **results)
1399 _compare_merge_configs(configs, atoms)
1400 return configs[index]