Coverage for /builds/ase/ase/ase/io/octopus/input.py: 26.29%

369 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3import os 

4import re 

5 

6import numpy as np 

7 

8from ase import Atoms 

9from ase.calculators.calculator import kpts2ndarray 

10from ase.data import atomic_numbers 

11from ase.io import read 

12from ase.units import Bohr, Hartree 

13from ase.utils import reader 

14 

15special_ase_keywords = {'kpts'} 

16 

17 

18def process_special_kwargs(atoms, kwargs): 

19 kwargs = kwargs.copy() 

20 kpts = kwargs.pop('kpts', None) 

21 if kpts is not None: 

22 for kw in ['kpoints', 'reducedkpoints', 'kpointsgrid']: 

23 if kw in kwargs: 

24 raise ValueError('k-points specified multiple times') 

25 

26 kptsarray = kpts2ndarray(kpts, atoms) 

27 nkpts = len(kptsarray) 

28 fullarray = np.empty((nkpts, 4)) 

29 fullarray[:, 0] = 1.0 / nkpts # weights 

30 fullarray[:, 1:4] = kptsarray 

31 kwargs['kpointsreduced'] = fullarray.tolist() 

32 

33 # TODO xc=LDA/PBE etc. 

34 

35 # The idea is to get rid of the special keywords, since the rest 

36 # will be passed to Octopus 

37 # XXX do a better check of this 

38 for kw in special_ase_keywords: 

39 assert kw not in kwargs, kw 

40 return kwargs 

41 

42 

43class OctopusParseError(Exception): 

44 pass # Cannot parse input file 

45 

46 

47# Parse value as written in input file *or* something that one would be 

48# passing to the ASE interface, i.e., this might already be a boolean 

49def octbool2bool(value): 

50 value = value.lower() 

51 if isinstance(value, int): 

52 return bool(value) 

53 if value in ['true', 't', 'yes', '1']: 

54 return True 

55 elif value in ['no', 'f', 'false', '0']: 

56 return False 

57 else: 

58 raise ValueError(f'Failed to interpret "{value}" as a boolean.') 

59 

60 

61def list2block(name, rows): 

62 """Construct 'block' of Octopus input. 

63 

64 convert a list of rows to a string with the format x | x | .... 

65 for the octopus input file""" 

66 lines = [] 

67 lines.append('%' + name) 

68 for row in rows: 

69 lines.append(' ' + ' | '.join(str(obj) for obj in row)) 

70 lines.append('%') 

71 return lines 

72 

73 

74def normalize_keywords(kwargs): 

75 """Reduce keywords to unambiguous form (lowercase).""" 

76 newkwargs = {} 

77 for arg, value in kwargs.items(): 

78 lkey = arg.lower() 

79 newkwargs[lkey] = value 

80 return newkwargs 

81 

82 

83def input_line_iter(lines): 

84 """Convenient iterator for parsing input files 'cleanly'. 

85 

86 Discards comments etc.""" 

87 for line in lines: 

88 line = line.split('#')[0].strip() 

89 if not line or line.isspace(): 

90 continue 

91 line = line.strip() 

92 yield line 

93 

94 

95def block2list(namespace, lines, header=None): 

96 """Parse lines of block and return list of lists of strings.""" 

97 lines = iter(lines) 

98 block = [] 

99 if header is None: 

100 header = next(lines) 

101 assert header.startswith('%'), header 

102 name = header[1:].strip().lower() 

103 for line in lines: 

104 if line.startswith('%'): # Could also say line == '%' most likely. 

105 break 

106 tokens = [namespace.evaluate(token) 

107 for token in line.strip().split('|')] 

108 # XXX will fail for string literals containing '|' 

109 block.append(tokens) 

110 return name, block 

111 

112 

113class OctNamespace: 

114 def __init__(self): 

115 self.names = {} 

116 self.consts = {'pi': np.pi, 

117 'angstrom': 1. / Bohr, 

118 'ev': 1. / Hartree, 

119 'yes': True, 

120 'no': False, 

121 't': True, 

122 'f': False, 

123 'i': 1j, # This will probably cause trouble 

124 'true': True, 

125 'false': False} 

126 

127 def evaluate(self, value): 

128 value = value.strip() 

129 

130 for char in '"', "'": # String literal 

131 if value.startswith(char): 

132 assert value.endswith(char) 

133 return value 

134 

135 value = value.lower() 

136 

137 if value in self.consts: # boolean or other constant 

138 return self.consts[value] 

139 

140 if value in self.names: # existing variable 

141 return self.names[value] 

142 

143 try: # literal integer 

144 v = int(value) 

145 except ValueError: 

146 pass 

147 else: 

148 if v == float(v): 

149 return v 

150 

151 try: # literal float 

152 return float(value) 

153 except ValueError: 

154 pass 

155 

156 if ('*' in value or '/' in value 

157 and not any(char in value for char in '()+')): 

158 floatvalue = 1.0 

159 op = '*' 

160 for token in re.split(r'([\*/])', value): 

161 if token in '*/': 

162 op = token 

163 continue 

164 

165 v = self.evaluate(token) 

166 

167 try: 

168 v = float(v) 

169 except TypeError: 

170 try: 

171 v = complex(v) 

172 except ValueError: 

173 break 

174 except ValueError: 

175 break # Cannot evaluate expression 

176 else: 

177 if op == '*': 

178 floatvalue *= v 

179 else: 

180 assert op == '/', op 

181 floatvalue /= v 

182 else: # Loop completed successfully 

183 return floatvalue 

184 return value # unknown name, or complex arithmetic expression 

185 

186 def add(self, name, value): 

187 value = self.evaluate(value) 

188 self.names[name.lower().strip()] = value 

189 

190 

191def parse_input_file(fd): 

192 namespace = OctNamespace() 

193 lines = input_line_iter(fd) 

194 blocks = {} 

195 while True: 

196 try: 

197 line = next(lines) 

198 except StopIteration: 

199 break 

200 else: 

201 if line.startswith('%'): 

202 name, value = block2list(namespace, lines, header=line) 

203 blocks[name] = value 

204 else: 

205 tokens = line.split('=', 1) 

206 assert len(tokens) == 2, tokens 

207 name, value = tokens 

208 namespace.add(name, value) 

209 

210 namespace.names.update(blocks) 

211 return namespace.names 

212 

213 

214def kwargs2cell(kwargs): 

215 # kwargs -> cell + remaining kwargs 

216 # cell will be None if not ASE-compatible. 

217 # 

218 # Returns numbers verbatim; caller must convert units. 

219 kwargs = normalize_keywords(kwargs) 

220 

221 if boxshape_is_ase_compatible(kwargs): 

222 kwargs.pop('boxshape', None) 

223 if 'lsize' in kwargs: 

224 Lsize = kwargs.pop('lsize') 

225 if not isinstance(Lsize, list): 

226 Lsize = [[Lsize] * 3] 

227 assert len(Lsize) == 1 

228 cell = np.array([2 * float(x) for x in Lsize[0]]) 

229 elif 'latticeparameters' in kwargs: 

230 # Eval latparam and latvec 

231 latparam = np.array(kwargs.pop('latticeparameters'), float).T 

232 cell = np.array(kwargs.pop('latticevectors', np.eye(3)), float) 

233 for a, vec in zip(latparam, cell): 

234 vec *= a 

235 assert cell.shape == (3, 3) 

236 else: 

237 cell = None 

238 return cell, kwargs 

239 

240 

241def boxshape_is_ase_compatible(kwargs): 

242 pdims = int(kwargs.get('periodicdimensions', 0)) 

243 default_boxshape = 'parallelepiped' if pdims > 0 else 'minimum' 

244 boxshape = kwargs.get('boxshape', default_boxshape).lower() 

245 # XXX add support for experimental keyword 'latticevectors' 

246 return boxshape == 'parallelepiped' 

247 

248 

249def kwargs2atoms(kwargs, directory=None): 

250 """Extract atoms object from keywords and return remaining keywords. 

251 

252 Some keyword arguments may refer to files. The directory keyword 

253 may be necessary to resolve the paths correctly, and is used for 

254 example when running 'ase gui somedir/inp'.""" 

255 kwargs = normalize_keywords(kwargs) 

256 

257 # Only input units accepted nowadays are 'atomic'. 

258 # But if we are loading an old file, and it specifies something else, 

259 # we can be sure that the user wanted that back then. 

260 

261 coord_keywords = ['coordinates', 

262 'xyzcoordinates', 

263 'pdbcoordinates', 

264 'reducedcoordinates', 

265 'xsfcoordinates', 

266 'xsfcoordinatesanimstep'] 

267 

268 nkeywords = 0 

269 for keyword in coord_keywords: 

270 if keyword in kwargs: 

271 nkeywords += 1 

272 if nkeywords == 0: 

273 raise OctopusParseError('No coordinates') 

274 elif nkeywords > 1: 

275 raise OctopusParseError('Multiple coordinate specifications present. ' 

276 'This may be okay in Octopus, but we do not ' 

277 'implement it.') 

278 

279 def get_positions_from_block(keyword): 

280 # %Coordinates or %ReducedCoordinates -> atomic numbers, positions. 

281 block = kwargs.pop(keyword) 

282 positions = [] 

283 numbers = [] 

284 tags = [] 

285 types = {} 

286 for row in block: 

287 assert len(row) in [ndims + 1, ndims + 2] 

288 row = row[:ndims + 1] 

289 sym = row[0] 

290 assert sym.startswith('"') or sym.startswith("'") 

291 assert sym[0] == sym[-1] 

292 sym = sym[1:-1] 

293 pos0 = np.zeros(3) 

294 ndim = int(kwargs.get('dimensions', 3)) 

295 pos0[:ndim] = [float(element) for element in row[1:]] 

296 number = atomic_numbers.get(sym) # Use 0 ~ 'X' for unknown? 

297 tag = 0 

298 if number is None: 

299 if sym not in types: 

300 tag = len(types) + 1 

301 types[sym] = tag 

302 number = 0 

303 tag = types[sym] 

304 tags.append(tag) 

305 numbers.append(number) 

306 positions.append(pos0) 

307 positions = np.array(positions) 

308 tags = np.array(tags, int) 

309 if types: 

310 ase_types = {} 

311 for sym, tag in types.items(): 

312 ase_types[('X', tag)] = sym 

313 info = {'types': ase_types} # 'info' dict for Atoms object 

314 else: 

315 tags = None 

316 info = None 

317 return numbers, positions, tags, info 

318 

319 def read_atoms_from_file(fname, fmt): 

320 assert fname.startswith('"') or fname.startswith("'") 

321 assert fname[0] == fname[-1] 

322 fname = fname[1:-1] 

323 if directory is not None: 

324 fname = os.path.join(directory, fname) 

325 # XXX test xyz, pbd and xsf 

326 if fmt == 'xsf' and 'xsfcoordinatesanimstep' in kwargs: 

327 anim_step = kwargs.pop('xsfcoordinatesanimstep') 

328 theslice = slice(anim_step, anim_step + 1, 1) 

329 # XXX test animstep 

330 else: 

331 theslice = slice(None, None, 1) 

332 images = read(fname, theslice, fmt) 

333 if len(images) != 1: 

334 raise OctopusParseError('Expected only one image. Don\'t know ' 

335 'what to do with %d images.' % len(images)) 

336 return images[0] 

337 

338 # We will attempt to extract cell and pbc from kwargs if 'lacking'. 

339 # But they might have been left unspecified on purpose. 

340 # 

341 # We need to keep track of these two variables "externally" 

342 # because the Atoms object assigns values when they are not given. 

343 cell = None 

344 pbc = None 

345 adjust_positions_by_half_cell = False 

346 

347 atoms = None 

348 xsfcoords = kwargs.pop('xsfcoordinates', None) 

349 if xsfcoords is not None: 

350 atoms = read_atoms_from_file(xsfcoords, 'xsf') 

351 atoms.positions *= Bohr 

352 atoms.cell *= Bohr 

353 # As it turns out, non-periodic xsf is not supported by octopus. 

354 # Also, it only supports fully periodic or fully non-periodic.... 

355 # So the only thing that we can test here is 3D fully periodic. 

356 if sum(atoms.pbc) != 3: 

357 raise NotImplementedError('XSF not fully periodic with Octopus') 

358 cell = atoms.cell 

359 pbc = atoms.pbc 

360 # Position adjustment doesn't actually matter but this should work 

361 # most 'nicely': 

362 adjust_positions_by_half_cell = False 

363 xyzcoords = kwargs.pop('xyzcoordinates', None) 

364 if xyzcoords is not None: 

365 atoms = read_atoms_from_file(xyzcoords, 'xyz') 

366 atoms.positions *= Bohr 

367 adjust_positions_by_half_cell = True 

368 pdbcoords = kwargs.pop('pdbcoordinates', None) 

369 if pdbcoords is not None: 

370 atoms = read_atoms_from_file(pdbcoords, 'pdb') 

371 pbc = atoms.pbc 

372 adjust_positions_by_half_cell = True 

373 # Due to an error in ASE pdb, we can only test the nonperiodic case. 

374 # atoms.cell *= Bohr # XXX cell? Not in nonperiodic case... 

375 atoms.positions *= Bohr 

376 if sum(atoms.pbc) != 0: 

377 raise NotImplementedError('Periodic pdb not supported by ASE.') 

378 

379 if cell is None: 

380 # cell could not be established from the file, so we set it on the 

381 # Atoms now if possible: 

382 cell, kwargs = kwargs2cell(kwargs) 

383 if cell is not None: 

384 cell *= Bohr 

385 if cell is not None and atoms is not None: 

386 atoms.cell = cell 

387 # In case of boxshape = sphere and similar, we still do not have 

388 # a cell. 

389 

390 ndims = int(kwargs.get('dimensions', 3)) 

391 if ndims != 3: 

392 raise NotImplementedError('Only 3D calculations supported.') 

393 

394 coords = kwargs.get('coordinates') 

395 if coords is not None: 

396 numbers, pos, tags, info = get_positions_from_block('coordinates') 

397 pos *= Bohr 

398 adjust_positions_by_half_cell = True 

399 atoms = Atoms(cell=cell, numbers=numbers, positions=pos, 

400 tags=tags, info=info) 

401 rcoords = kwargs.get('reducedcoordinates') 

402 if rcoords is not None: 

403 numbers, spos, tags, info = get_positions_from_block( 

404 'reducedcoordinates') 

405 if cell is None: 

406 raise ValueError('Cannot figure out what the cell is, ' 

407 'and thus cannot interpret reduced coordinates.') 

408 atoms = Atoms(cell=cell, numbers=numbers, scaled_positions=spos, 

409 tags=tags, info=info) 

410 if atoms is None: 

411 raise OctopusParseError('Apparently there are no atoms.') 

412 

413 # Either we have non-periodic BCs or the atoms object already 

414 # got its BCs from reading the file. In the latter case 

415 # we shall override only if PeriodicDimensions was given specifically: 

416 

417 if pbc is None: 

418 pdims = int(kwargs.pop('periodicdimensions', 0)) 

419 pbc = np.zeros(3, dtype=bool) 

420 pbc[:pdims] = True 

421 atoms.pbc = pbc 

422 

423 if (cell is not None and cell.shape == (3,) 

424 and adjust_positions_by_half_cell): 

425 nonpbc = (atoms.pbc == 0) 

426 atoms.positions[:, nonpbc] += np.array(cell)[None, nonpbc] / 2.0 

427 

428 return atoms, kwargs 

429 

430 

431def generate_input(atoms, kwargs): 

432 """Convert atoms and keyword arguments to Octopus input file.""" 

433 _lines = [] 

434 

435 kwargs = {key.lower(): value for key, value in kwargs.items()} 

436 

437 def append(line): 

438 _lines.append(line) 

439 

440 def extend(lines): 

441 _lines.extend(lines) 

442 append('') 

443 

444 def setvar(key, var): 

445 append(f'{key} = {var}') 

446 

447 for kw in ['lsize', 'latticevectors', 'latticeparameters']: 

448 assert kw not in kwargs 

449 

450 defaultboxshape = 'parallelepiped' if atoms.cell.rank > 0 else 'minimum' 

451 boxshape = kwargs.pop('boxshape', defaultboxshape).lower() 

452 use_ase_cell = (boxshape == 'parallelepiped') 

453 atomskwargs = atoms2kwargs(atoms, use_ase_cell) 

454 

455 setvar('boxshape', boxshape) 

456 

457 if use_ase_cell: 

458 if 'reducedcoordinates' in atomskwargs: 

459 extend(list2block('LatticeParameters', [[1., 1., 1.]])) 

460 block = list2block('LatticeVectors', atomskwargs['latticevectors']) 

461 else: 

462 assert 'lsize' in atomskwargs 

463 block = list2block('LSize', atomskwargs['lsize']) 

464 

465 extend(block) 

466 

467 # Allow override or issue errors? 

468 pdim = 'periodicdimensions' 

469 if pdim in kwargs: 

470 if int(kwargs[pdim]) != int(atomskwargs[pdim]): 

471 raise ValueError('Cannot reconcile periodicity in input ' 

472 'with that of Atoms object') 

473 setvar('periodicdimensions', atomskwargs[pdim]) 

474 

475 # We should say that we want the forces if the user requests forces. 

476 # However the Output keyword has changed between Octopus 10.1 and 11.4. 

477 # We'll leave it to the user to manually set keywords correctly. 

478 # The most complicated part is that the user probably needs to tell 

479 # Octopus to save the forces in xcrysden format in order for us to 

480 # parse them. 

481 

482 for key, val in kwargs.items(): 

483 # Most datatypes are straightforward but blocks require some attention. 

484 if isinstance(val, list): 

485 append('') 

486 dict_data = list2block(key, val) 

487 extend(dict_data) 

488 else: 

489 setvar(key, str(val)) 

490 append('') 

491 

492 if 'reducedcoordinates' in atomskwargs: 

493 coord_block = list2block('ReducedCoordinates', 

494 atomskwargs['reducedcoordinates']) 

495 else: 

496 coord_block = list2block('Coordinates', atomskwargs['coordinates']) 

497 extend(coord_block) 

498 return '\n'.join(_lines) 

499 

500 

501def atoms2kwargs(atoms, use_ase_cell): 

502 kwargs = {} 

503 

504 if any(atoms.pbc): 

505 coordtype = 'reducedcoordinates' 

506 coords = atoms.get_scaled_positions(wrap=False) 

507 else: 

508 coordtype = 'coordinates' 

509 coords = atoms.positions / Bohr 

510 

511 if use_ase_cell: 

512 cell = atoms.cell / Bohr 

513 if coordtype == 'coordinates': 

514 cell_offset = 0.5 * cell.sum(axis=0) 

515 coords -= cell_offset 

516 else: 

517 assert coordtype == 'reducedcoordinates' 

518 

519 if atoms.cell.orthorhombic: 

520 Lsize = 0.5 * np.diag(cell) 

521 kwargs['lsize'] = [[str(size) for size in Lsize]] 

522 # ASE uses (0...cell) while Octopus uses -L/2...L/2. 

523 # Lsize is really cell / 2, and we have to adjust our 

524 # positions by subtracting Lsize (see construction of the coords 

525 # block) in non-periodic directions. 

526 else: 

527 kwargs['latticevectors'] = cell.tolist() 

528 

529 types = atoms.info.get('types', {}) 

530 

531 coord_block = [] 

532 for sym, pos, tag in zip(atoms.symbols, coords, atoms.get_tags()): 

533 if sym == 'X': 

534 sym = types.get((sym, tag)) 

535 if sym is None: 

536 raise ValueError('Cannot represent atom X without tags and ' 

537 'species info in atoms.info') 

538 coord_block.append([repr(sym)] + [str(x) for x in pos]) 

539 

540 kwargs[coordtype] = coord_block 

541 npbc = sum(atoms.pbc) 

542 for c in range(npbc): 

543 if not atoms.pbc[c]: 

544 msg = ('Boundary conditions of Atoms object inconsistent ' 

545 'with requirements of Octopus. pbc must be either ' 

546 '000, 100, 110, or 111.') 

547 raise ValueError(msg) 

548 kwargs['periodicdimensions'] = npbc 

549 

550 # TODO InitialSpins 

551 # 

552 # TODO can use maximumiterations + output/outputformat to extract 

553 # things from restart file into output files without trouble. 

554 # 

555 # Velocities etc.? 

556 return kwargs 

557 

558 

559@reader 

560def read_octopus_in(fd, get_kwargs=False): 

561 kwargs = parse_input_file(fd) 

562 

563 # input files may contain internal references to other files such 

564 # as xyz or xsf. We need to know the directory where the file 

565 # resides in order to locate those. If fd is a real file 

566 # object, it contains the path and we can use it. Else assume 

567 # pwd. 

568 # 

569 # Maybe this is ugly; maybe it can lead to strange bugs if someone 

570 # wants a non-standard file-like type. But it's probably better than 

571 # failing 'ase gui somedir/inp' 

572 try: 

573 fname = fd.name 

574 except AttributeError: 

575 directory = None 

576 else: 

577 directory = os.path.split(fname)[0] 

578 

579 atoms, remaining_kwargs = kwargs2atoms(kwargs, directory=directory) 

580 if get_kwargs: 

581 return atoms, remaining_kwargs 

582 else: 

583 return atoms