Coverage for ase / io / octopus / input.py: 26.88%
372 statements
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:20 +0000
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:20 +0000
1import os
2import re
4import numpy as np
6from ase import Atoms
7from ase.calculators.calculator import kpts2ndarray
8from ase.data import atomic_numbers
9from ase.io import read
10from ase.units import Bohr, Hartree
11from ase.utils import reader
13special_ase_keywords = {'kpts'}
16def process_special_kwargs(atoms, kwargs):
17 kwargs = kwargs.copy()
18 kpts = kwargs.pop('kpts', None)
19 if kpts is not None:
20 for kw in ['kpoints', 'reducedkpoints', 'kpointsgrid']:
21 if kw in kwargs:
22 raise ValueError('k-points specified multiple times')
24 kptsarray = kpts2ndarray(kpts, atoms)
25 nkpts = len(kptsarray)
26 fullarray = np.empty((nkpts, 4))
27 fullarray[:, 0] = 1.0 / nkpts # weights
28 fullarray[:, 1:4] = kptsarray
29 kwargs['kpointsreduced'] = fullarray.tolist()
31 # TODO xc=LDA/PBE etc.
33 # The idea is to get rid of the special keywords, since the rest
34 # will be passed to Octopus
35 # XXX do a better check of this
36 for kw in special_ase_keywords:
37 assert kw not in kwargs, kw
38 return kwargs
41class OctopusParseError(Exception):
42 pass # Cannot parse input file
45# Parse value as written in input file *or* something that one would be
46# passing to the ASE interface, i.e., this might already be a boolean
47def octbool2bool(value):
48 value = value.lower()
49 if isinstance(value, int):
50 return bool(value)
51 if value in ['true', 't', 'yes', '1']:
52 return True
53 elif value in ['no', 'f', 'false', '0']:
54 return False
55 else:
56 raise ValueError(f'Failed to interpret "{value}" as a boolean.')
59def list2block(name, rows):
60 """Construct 'block' of Octopus input.
62 convert a list of rows to a string with the format x | x | ....
63 for the octopus input file"""
64 lines = []
65 lines.append('%' + name)
66 for row in rows:
67 lines.append(' ' + ' | '.join(str(obj) for obj in row))
68 lines.append('%')
69 return lines
72def normalize_keywords(kwargs):
73 """Reduce keywords to unambiguous form (lowercase)."""
74 newkwargs = {}
75 for arg, value in kwargs.items():
76 lkey = arg.lower()
77 newkwargs[lkey] = value
78 return newkwargs
81def input_line_iter(lines):
82 """Convenient iterator for parsing input files 'cleanly'.
84 Discards comments etc."""
85 for line in lines:
86 line = line.split('#')[0].strip()
87 if not line or line.isspace():
88 continue
89 line = line.strip()
90 yield line
93def block2list(namespace, lines, header=None):
94 """Parse lines of block and return list of lists of strings."""
95 lines = iter(lines)
96 block = []
97 if header is None:
98 header = next(lines)
99 assert header.startswith('%'), header
100 name = header[1:].strip().lower()
101 for line in lines:
102 if line.startswith('%'): # Could also say line == '%' most likely.
103 break
104 tokens = [
105 namespace.evaluate(token) for token in line.strip().split('|')
106 ]
107 # XXX will fail for string literals containing '|'
108 block.append(tokens)
109 return name, block
112class OctNamespace:
113 def __init__(self):
114 self.names = {}
115 self.consts = {
116 'pi': np.pi,
117 'angstrom': 1.0 / Bohr,
118 'ev': 1.0 / Hartree,
119 'yes': True,
120 'no': False,
121 't': True,
122 'f': False,
123 'i': 1j, # This will probably cause trouble
124 'true': True,
125 'false': False,
126 }
128 def evaluate(self, value):
129 value = value.strip()
131 for char in '"', "'": # String literal
132 if value.startswith(char):
133 assert value.endswith(char)
134 return value
136 value = value.lower()
138 if value in self.consts: # boolean or other constant
139 return self.consts[value]
141 if value in self.names: # existing variable
142 return self.names[value]
144 try: # literal integer
145 v = int(value)
146 except ValueError:
147 pass
148 else:
149 if v == float(v):
150 return v
152 try: # literal float
153 return float(value)
154 except ValueError:
155 pass
157 if (
158 '*' in value
159 or '/' in value
160 and not any(char in value for char in '()+')
161 ):
162 floatvalue = 1.0
163 op = '*'
164 for token in re.split(r'([\*/])', value):
165 if token in '*/':
166 op = token
167 continue
169 v = self.evaluate(token)
171 try:
172 v = float(v)
173 except TypeError:
174 try:
175 v = complex(v)
176 except ValueError:
177 break
178 except ValueError:
179 break # Cannot evaluate expression
180 else:
181 if op == '*':
182 floatvalue *= v
183 else:
184 assert op == '/', op
185 floatvalue /= v
186 else: # Loop completed successfully
187 return floatvalue
188 return value # unknown name, or complex arithmetic expression
190 def add(self, name, value):
191 value = self.evaluate(value)
192 self.names[name.lower().strip()] = value
195def parse_input_file(fd):
196 namespace = OctNamespace()
197 lines = input_line_iter(fd)
198 blocks = {}
199 while True:
200 try:
201 line = next(lines)
202 except StopIteration:
203 break
204 else:
205 if line.startswith('%'):
206 name, value = block2list(namespace, lines, header=line)
207 blocks[name] = value
208 else:
209 tokens = line.split('=', 1)
210 assert len(tokens) == 2, tokens
211 name, value = tokens
212 namespace.add(name, value)
214 namespace.names.update(blocks)
215 return namespace.names
218def kwargs2cell(kwargs):
219 # kwargs -> cell + remaining kwargs
220 # cell will be None if not ASE-compatible.
221 #
222 # Returns numbers verbatim; caller must convert units.
223 kwargs = normalize_keywords(kwargs)
225 if boxshape_is_ase_compatible(kwargs):
226 kwargs.pop('boxshape', None)
227 if 'lsize' in kwargs:
228 Lsize = kwargs.pop('lsize')
229 if not isinstance(Lsize, list):
230 Lsize = [[Lsize] * 3]
231 assert len(Lsize) == 1
232 cell = np.array([2 * float(x) for x in Lsize[0]])
233 elif 'latticeparameters' in kwargs:
234 # Eval latparam and latvec
235 latparam = np.array(kwargs.pop('latticeparameters'), float).T
236 cell = np.array(kwargs.pop('latticevectors', np.eye(3)), float)
237 for a, vec in zip(latparam, cell):
238 vec *= a
239 assert cell.shape == (3, 3)
240 else:
241 cell = None
242 return cell, kwargs
245def boxshape_is_ase_compatible(kwargs):
246 pdims = int(kwargs.get('periodicdimensions', 0))
247 default_boxshape = 'parallelepiped' if pdims > 0 else 'minimum'
248 boxshape = kwargs.get('boxshape', default_boxshape).lower()
249 # XXX add support for experimental keyword 'latticevectors'
250 return boxshape == 'parallelepiped'
253def kwargs2atoms(kwargs, directory=None):
254 """Extract atoms object from keywords and return remaining keywords.
256 Some keyword arguments may refer to files. The directory keyword
257 may be necessary to resolve the paths correctly, and is used for
258 example when running 'ase gui somedir/inp'."""
259 kwargs = normalize_keywords(kwargs)
261 # Only input units accepted nowadays are 'atomic'.
262 # But if we are loading an old file, and it specifies something else,
263 # we can be sure that the user wanted that back then.
265 coord_keywords = [
266 'coordinates',
267 'xyzcoordinates',
268 'pdbcoordinates',
269 'reducedcoordinates',
270 'xsfcoordinates',
271 'xsfcoordinatesanimstep',
272 ]
274 nkeywords = 0
275 for keyword in coord_keywords:
276 if keyword in kwargs:
277 nkeywords += 1
278 if nkeywords == 0:
279 raise OctopusParseError('No coordinates')
280 elif nkeywords > 1:
281 raise OctopusParseError(
282 'Multiple coordinate specifications present. '
283 'This may be okay in Octopus, but we do not '
284 'implement it.'
285 )
287 def get_positions_from_block(keyword):
288 # %Coordinates or %ReducedCoordinates -> atomic numbers, positions.
289 block = kwargs.pop(keyword)
290 positions = []
291 numbers = []
292 tags = []
293 types = {}
294 for row in block:
295 assert len(row) in [ndims + 1, ndims + 2]
296 row = row[: ndims + 1]
297 sym = row[0]
298 assert sym.startswith('"') or sym.startswith("'")
299 assert sym[0] == sym[-1]
300 sym = sym[1:-1]
301 pos0 = np.zeros(3)
302 ndim = int(kwargs.get('dimensions', 3))
303 pos0[:ndim] = [float(element) for element in row[1:]]
304 number = atomic_numbers.get(sym) # Use 0 ~ 'X' for unknown?
305 tag = 0
306 if number is None:
307 if sym not in types:
308 tag = len(types) + 1
309 types[sym] = tag
310 number = 0
311 tag = types[sym]
312 tags.append(tag)
313 numbers.append(number)
314 positions.append(pos0)
315 positions = np.array(positions)
316 tags = np.array(tags, int)
317 if types:
318 ase_types = {}
319 for sym, tag in types.items():
320 ase_types[('X', tag)] = sym
321 info = {'types': ase_types} # 'info' dict for Atoms object
322 else:
323 tags = None
324 info = None
325 return numbers, positions, tags, info
327 def read_atoms_from_file(fname, fmt):
328 assert fname.startswith('"') or fname.startswith("'")
329 assert fname[0] == fname[-1]
330 fname = fname[1:-1]
331 if directory is not None:
332 fname = os.path.join(directory, fname)
333 # XXX test xyz, pbd and xsf
334 if fmt == 'xsf' and 'xsfcoordinatesanimstep' in kwargs:
335 anim_step = kwargs.pop('xsfcoordinatesanimstep')
336 theslice = slice(anim_step, anim_step + 1, 1)
337 # XXX test animstep
338 else:
339 theslice = slice(None, None, 1)
340 images = read(fname, theslice, fmt)
341 if len(images) != 1:
342 raise OctopusParseError(
343 "Expected only one image. Don't know "
344 'what to do with %d images.' % len(images)
345 )
346 return images[0]
348 # We will attempt to extract cell and pbc from kwargs if 'lacking'.
349 # But they might have been left unspecified on purpose.
350 #
351 # We need to keep track of these two variables "externally"
352 # because the Atoms object assigns values when they are not given.
353 cell = None
354 pbc = None
355 adjust_positions_by_half_cell = False
357 atoms = None
358 xsfcoords = kwargs.pop('xsfcoordinates', None)
359 if xsfcoords is not None:
360 atoms = read_atoms_from_file(xsfcoords, 'xsf')
361 atoms.positions *= Bohr
362 atoms.cell *= Bohr
363 # As it turns out, non-periodic xsf is not supported by octopus.
364 # Also, it only supports fully periodic or fully non-periodic....
365 # So the only thing that we can test here is 3D fully periodic.
366 if sum(atoms.pbc) != 3:
367 raise NotImplementedError('XSF not fully periodic with Octopus')
368 cell = atoms.cell
369 pbc = atoms.pbc
370 # Position adjustment doesn't actually matter but this should work
371 # most 'nicely':
372 adjust_positions_by_half_cell = False
373 xyzcoords = kwargs.pop('xyzcoordinates', None)
374 if xyzcoords is not None:
375 atoms = read_atoms_from_file(xyzcoords, 'xyz')
376 atoms.positions *= Bohr
377 adjust_positions_by_half_cell = True
378 pdbcoords = kwargs.pop('pdbcoordinates', None)
379 if pdbcoords is not None:
380 atoms = read_atoms_from_file(pdbcoords, 'pdb')
381 pbc = atoms.pbc
382 adjust_positions_by_half_cell = True
383 # Due to an error in ASE pdb, we can only test the nonperiodic case.
384 # atoms.cell *= Bohr # XXX cell? Not in nonperiodic case...
385 atoms.positions *= Bohr
386 if sum(atoms.pbc) != 0:
387 raise NotImplementedError('Periodic pdb not supported by ASE.')
389 if cell is None:
390 # cell could not be established from the file, so we set it on the
391 # Atoms now if possible:
392 cell, kwargs = kwargs2cell(kwargs)
393 if cell is not None:
394 cell *= Bohr
395 if cell is not None and atoms is not None:
396 atoms.cell = cell
397 # In case of boxshape = sphere and similar, we still do not have
398 # a cell.
400 ndims = int(kwargs.get('dimensions', 3))
401 if ndims != 3:
402 raise NotImplementedError('Only 3D calculations supported.')
404 coords = kwargs.get('coordinates')
405 if coords is not None:
406 numbers, pos, tags, info = get_positions_from_block('coordinates')
407 pos *= Bohr
408 adjust_positions_by_half_cell = True
409 atoms = Atoms(
410 cell=cell, numbers=numbers, positions=pos, tags=tags, info=info
411 )
412 rcoords = kwargs.get('reducedcoordinates')
413 if rcoords is not None:
414 numbers, spos, tags, info = get_positions_from_block(
415 'reducedcoordinates'
416 )
417 if cell is None:
418 raise ValueError(
419 'Cannot figure out what the cell is, '
420 'and thus cannot interpret reduced coordinates.'
421 )
422 atoms = Atoms(
423 cell=cell,
424 numbers=numbers,
425 scaled_positions=spos,
426 tags=tags,
427 info=info,
428 )
429 if atoms is None:
430 raise OctopusParseError('Apparently there are no atoms.')
432 # Either we have non-periodic BCs or the atoms object already
433 # got its BCs from reading the file. In the latter case
434 # we shall override only if PeriodicDimensions was given specifically:
436 if pbc is None:
437 pdims = int(kwargs.pop('periodicdimensions', 0))
438 pbc = np.zeros(3, dtype=bool)
439 pbc[:pdims] = True
440 atoms.pbc = pbc
442 if (
443 cell is not None
444 and cell.shape == (3,)
445 and adjust_positions_by_half_cell
446 ):
447 nonpbc = atoms.pbc == 0
448 atoms.positions[:, nonpbc] += np.array(cell)[None, nonpbc] / 2.0
450 return atoms, kwargs
453def generate_input(atoms, kwargs):
454 """Convert atoms and keyword arguments to Octopus input file."""
455 _lines = []
457 kwargs = {key.lower(): value for key, value in kwargs.items()}
459 def append(line):
460 _lines.append(line)
462 def extend(lines):
463 _lines.extend(lines)
464 append('')
466 def setvar(key, var):
467 append(f'{key} = {var}')
469 for kw in ['lsize', 'latticevectors', 'latticeparameters']:
470 assert kw not in kwargs
472 defaultboxshape = 'parallelepiped' if atoms.cell.rank > 0 else 'minimum'
473 boxshape = kwargs.pop('boxshape', defaultboxshape).lower()
474 use_ase_cell = boxshape == 'parallelepiped'
475 atomskwargs = atoms2kwargs(atoms, use_ase_cell)
477 setvar('boxshape', boxshape)
479 if use_ase_cell:
480 if 'latticevectors' in atomskwargs:
481 extend(list2block('LatticeParameters', [[1.0, 1.0, 1.0]]))
482 block = list2block('LatticeVectors', atomskwargs['latticevectors'])
483 else:
484 assert 'lsize' in atomskwargs
485 block = list2block('LSize', atomskwargs['lsize'])
487 extend(block)
489 # Allow override or issue errors?
490 pdim = 'periodicdimensions'
491 if pdim in kwargs:
492 if int(kwargs[pdim]) != int(atomskwargs[pdim]):
493 raise ValueError(
494 'Cannot reconcile periodicity in input '
495 'with that of Atoms object'
496 )
497 setvar('periodicdimensions', atomskwargs[pdim])
499 # We should say that we want the forces if the user requests forces.
500 # However the Output keyword has changed between Octopus 10.1 and 11.4.
501 # We'll leave it to the user to manually set keywords correctly.
502 # The most complicated part is that the user probably needs to tell
503 # Octopus to save the forces in xcrysden format in order for us to
504 # parse them.
506 for key, val in kwargs.items():
507 # Most datatypes are straightforward but blocks require some attention.
508 if isinstance(val, list):
509 append('')
510 dict_data = list2block(key, val)
511 extend(dict_data)
512 else:
513 setvar(key, str(val))
514 append('')
516 if 'reducedcoordinates' in atomskwargs:
517 coord_block = list2block(
518 'ReducedCoordinates', atomskwargs['reducedcoordinates']
519 )
520 else:
521 coord_block = list2block('Coordinates', atomskwargs['coordinates'])
522 extend(coord_block)
523 return '\n'.join(_lines)
526def atoms2kwargs(atoms, use_ase_cell):
527 kwargs = {}
529 if any(atoms.pbc):
530 coordkeyword = 'reducedcoordinates'
531 coords = atoms.get_scaled_positions(wrap=False)
532 cellkeyword = 'latticevectors'
533 else:
534 coordkeyword = 'coordinates'
535 coords = atoms.positions / Bohr
536 cellkeyword = 'lsize'
538 if use_ase_cell:
539 cell = atoms.cell / Bohr
540 if coordkeyword == 'coordinates':
541 cell_offset = 0.5 * cell.sum(axis=0)
542 coords -= cell_offset
543 else:
544 assert coordkeyword == 'reducedcoordinates'
546 if cellkeyword == 'lsize':
547 Lsize = 0.5 * np.diag(cell)
548 kwargs['lsize'] = [[str(size) for size in Lsize]]
549 # ASE uses (0...cell) while Octopus uses -L/2...L/2.
550 # Lsize is really cell / 2, and we have to adjust our
551 # positions by subtracting Lsize (see construction of the coords
552 # block) in non-periodic directions.
553 else:
554 assert cellkeyword == 'latticevectors'
555 kwargs['latticevectors'] = cell.tolist()
557 types = atoms.info.get('types', {})
559 coord_block = []
560 for sym, pos, tag in zip(atoms.symbols, coords, atoms.get_tags()):
561 if sym == 'X':
562 sym = types.get((sym, tag))
563 if sym is None:
564 raise ValueError(
565 'Cannot represent atom X without tags and '
566 'species info in atoms.info'
567 )
568 coord_block.append([repr(sym)] + [str(x) for x in pos])
570 kwargs[coordkeyword] = coord_block
571 npbc = sum(atoms.pbc)
572 for c in range(npbc):
573 if not atoms.pbc[c]:
574 msg = (
575 'Boundary conditions of Atoms object inconsistent '
576 'with requirements of Octopus. pbc must be either '
577 '000, 100, 110, or 111.'
578 )
579 raise ValueError(msg)
580 kwargs['periodicdimensions'] = npbc
582 # TODO InitialSpins
583 #
584 # TODO can use maximumiterations + output/outputformat to extract
585 # things from restart file into output files without trouble.
586 #
587 # Velocities etc.?
588 return kwargs
591@reader
592def read_octopus_in(fd, get_kwargs=False):
593 kwargs = parse_input_file(fd)
595 # input files may contain internal references to other files such
596 # as xyz or xsf. We need to know the directory where the file
597 # resides in order to locate those. If fd is a real file
598 # object, it contains the path and we can use it. Else assume
599 # pwd.
600 #
601 # Maybe this is ugly; maybe it can lead to strange bugs if someone
602 # wants a non-standard file-like type. But it's probably better than
603 # failing 'ase gui somedir/inp'
604 try:
605 fname = fd.name
606 except AttributeError:
607 directory = None
608 else:
609 directory = os.path.split(fname)[0]
611 atoms, remaining_kwargs = kwargs2atoms(kwargs, directory=directory)
612 if get_kwargs:
613 return atoms, remaining_kwargs
614 else:
615 return atoms