Coverage for ase / io / octopus / input.py: 26.88%

372 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-04 10:20 +0000

1import os 

2import re 

3 

4import numpy as np 

5 

6from ase import Atoms 

7from ase.calculators.calculator import kpts2ndarray 

8from ase.data import atomic_numbers 

9from ase.io import read 

10from ase.units import Bohr, Hartree 

11from ase.utils import reader 

12 

13special_ase_keywords = {'kpts'} 

14 

15 

16def process_special_kwargs(atoms, kwargs): 

17 kwargs = kwargs.copy() 

18 kpts = kwargs.pop('kpts', None) 

19 if kpts is not None: 

20 for kw in ['kpoints', 'reducedkpoints', 'kpointsgrid']: 

21 if kw in kwargs: 

22 raise ValueError('k-points specified multiple times') 

23 

24 kptsarray = kpts2ndarray(kpts, atoms) 

25 nkpts = len(kptsarray) 

26 fullarray = np.empty((nkpts, 4)) 

27 fullarray[:, 0] = 1.0 / nkpts # weights 

28 fullarray[:, 1:4] = kptsarray 

29 kwargs['kpointsreduced'] = fullarray.tolist() 

30 

31 # TODO xc=LDA/PBE etc. 

32 

33 # The idea is to get rid of the special keywords, since the rest 

34 # will be passed to Octopus 

35 # XXX do a better check of this 

36 for kw in special_ase_keywords: 

37 assert kw not in kwargs, kw 

38 return kwargs 

39 

40 

41class OctopusParseError(Exception): 

42 pass # Cannot parse input file 

43 

44 

45# Parse value as written in input file *or* something that one would be 

46# passing to the ASE interface, i.e., this might already be a boolean 

47def octbool2bool(value): 

48 value = value.lower() 

49 if isinstance(value, int): 

50 return bool(value) 

51 if value in ['true', 't', 'yes', '1']: 

52 return True 

53 elif value in ['no', 'f', 'false', '0']: 

54 return False 

55 else: 

56 raise ValueError(f'Failed to interpret "{value}" as a boolean.') 

57 

58 

59def list2block(name, rows): 

60 """Construct 'block' of Octopus input. 

61 

62 convert a list of rows to a string with the format x | x | .... 

63 for the octopus input file""" 

64 lines = [] 

65 lines.append('%' + name) 

66 for row in rows: 

67 lines.append(' ' + ' | '.join(str(obj) for obj in row)) 

68 lines.append('%') 

69 return lines 

70 

71 

72def normalize_keywords(kwargs): 

73 """Reduce keywords to unambiguous form (lowercase).""" 

74 newkwargs = {} 

75 for arg, value in kwargs.items(): 

76 lkey = arg.lower() 

77 newkwargs[lkey] = value 

78 return newkwargs 

79 

80 

81def input_line_iter(lines): 

82 """Convenient iterator for parsing input files 'cleanly'. 

83 

84 Discards comments etc.""" 

85 for line in lines: 

86 line = line.split('#')[0].strip() 

87 if not line or line.isspace(): 

88 continue 

89 line = line.strip() 

90 yield line 

91 

92 

93def block2list(namespace, lines, header=None): 

94 """Parse lines of block and return list of lists of strings.""" 

95 lines = iter(lines) 

96 block = [] 

97 if header is None: 

98 header = next(lines) 

99 assert header.startswith('%'), header 

100 name = header[1:].strip().lower() 

101 for line in lines: 

102 if line.startswith('%'): # Could also say line == '%' most likely. 

103 break 

104 tokens = [ 

105 namespace.evaluate(token) for token in line.strip().split('|') 

106 ] 

107 # XXX will fail for string literals containing '|' 

108 block.append(tokens) 

109 return name, block 

110 

111 

112class OctNamespace: 

113 def __init__(self): 

114 self.names = {} 

115 self.consts = { 

116 'pi': np.pi, 

117 'angstrom': 1.0 / Bohr, 

118 'ev': 1.0 / Hartree, 

119 'yes': True, 

120 'no': False, 

121 't': True, 

122 'f': False, 

123 'i': 1j, # This will probably cause trouble 

124 'true': True, 

125 'false': False, 

126 } 

127 

128 def evaluate(self, value): 

129 value = value.strip() 

130 

131 for char in '"', "'": # String literal 

132 if value.startswith(char): 

133 assert value.endswith(char) 

134 return value 

135 

136 value = value.lower() 

137 

138 if value in self.consts: # boolean or other constant 

139 return self.consts[value] 

140 

141 if value in self.names: # existing variable 

142 return self.names[value] 

143 

144 try: # literal integer 

145 v = int(value) 

146 except ValueError: 

147 pass 

148 else: 

149 if v == float(v): 

150 return v 

151 

152 try: # literal float 

153 return float(value) 

154 except ValueError: 

155 pass 

156 

157 if ( 

158 '*' in value 

159 or '/' in value 

160 and not any(char in value for char in '()+') 

161 ): 

162 floatvalue = 1.0 

163 op = '*' 

164 for token in re.split(r'([\*/])', value): 

165 if token in '*/': 

166 op = token 

167 continue 

168 

169 v = self.evaluate(token) 

170 

171 try: 

172 v = float(v) 

173 except TypeError: 

174 try: 

175 v = complex(v) 

176 except ValueError: 

177 break 

178 except ValueError: 

179 break # Cannot evaluate expression 

180 else: 

181 if op == '*': 

182 floatvalue *= v 

183 else: 

184 assert op == '/', op 

185 floatvalue /= v 

186 else: # Loop completed successfully 

187 return floatvalue 

188 return value # unknown name, or complex arithmetic expression 

189 

190 def add(self, name, value): 

191 value = self.evaluate(value) 

192 self.names[name.lower().strip()] = value 

193 

194 

195def parse_input_file(fd): 

196 namespace = OctNamespace() 

197 lines = input_line_iter(fd) 

198 blocks = {} 

199 while True: 

200 try: 

201 line = next(lines) 

202 except StopIteration: 

203 break 

204 else: 

205 if line.startswith('%'): 

206 name, value = block2list(namespace, lines, header=line) 

207 blocks[name] = value 

208 else: 

209 tokens = line.split('=', 1) 

210 assert len(tokens) == 2, tokens 

211 name, value = tokens 

212 namespace.add(name, value) 

213 

214 namespace.names.update(blocks) 

215 return namespace.names 

216 

217 

218def kwargs2cell(kwargs): 

219 # kwargs -> cell + remaining kwargs 

220 # cell will be None if not ASE-compatible. 

221 # 

222 # Returns numbers verbatim; caller must convert units. 

223 kwargs = normalize_keywords(kwargs) 

224 

225 if boxshape_is_ase_compatible(kwargs): 

226 kwargs.pop('boxshape', None) 

227 if 'lsize' in kwargs: 

228 Lsize = kwargs.pop('lsize') 

229 if not isinstance(Lsize, list): 

230 Lsize = [[Lsize] * 3] 

231 assert len(Lsize) == 1 

232 cell = np.array([2 * float(x) for x in Lsize[0]]) 

233 elif 'latticeparameters' in kwargs: 

234 # Eval latparam and latvec 

235 latparam = np.array(kwargs.pop('latticeparameters'), float).T 

236 cell = np.array(kwargs.pop('latticevectors', np.eye(3)), float) 

237 for a, vec in zip(latparam, cell): 

238 vec *= a 

239 assert cell.shape == (3, 3) 

240 else: 

241 cell = None 

242 return cell, kwargs 

243 

244 

245def boxshape_is_ase_compatible(kwargs): 

246 pdims = int(kwargs.get('periodicdimensions', 0)) 

247 default_boxshape = 'parallelepiped' if pdims > 0 else 'minimum' 

248 boxshape = kwargs.get('boxshape', default_boxshape).lower() 

249 # XXX add support for experimental keyword 'latticevectors' 

250 return boxshape == 'parallelepiped' 

251 

252 

253def kwargs2atoms(kwargs, directory=None): 

254 """Extract atoms object from keywords and return remaining keywords. 

255 

256 Some keyword arguments may refer to files. The directory keyword 

257 may be necessary to resolve the paths correctly, and is used for 

258 example when running 'ase gui somedir/inp'.""" 

259 kwargs = normalize_keywords(kwargs) 

260 

261 # Only input units accepted nowadays are 'atomic'. 

262 # But if we are loading an old file, and it specifies something else, 

263 # we can be sure that the user wanted that back then. 

264 

265 coord_keywords = [ 

266 'coordinates', 

267 'xyzcoordinates', 

268 'pdbcoordinates', 

269 'reducedcoordinates', 

270 'xsfcoordinates', 

271 'xsfcoordinatesanimstep', 

272 ] 

273 

274 nkeywords = 0 

275 for keyword in coord_keywords: 

276 if keyword in kwargs: 

277 nkeywords += 1 

278 if nkeywords == 0: 

279 raise OctopusParseError('No coordinates') 

280 elif nkeywords > 1: 

281 raise OctopusParseError( 

282 'Multiple coordinate specifications present. ' 

283 'This may be okay in Octopus, but we do not ' 

284 'implement it.' 

285 ) 

286 

287 def get_positions_from_block(keyword): 

288 # %Coordinates or %ReducedCoordinates -> atomic numbers, positions. 

289 block = kwargs.pop(keyword) 

290 positions = [] 

291 numbers = [] 

292 tags = [] 

293 types = {} 

294 for row in block: 

295 assert len(row) in [ndims + 1, ndims + 2] 

296 row = row[: ndims + 1] 

297 sym = row[0] 

298 assert sym.startswith('"') or sym.startswith("'") 

299 assert sym[0] == sym[-1] 

300 sym = sym[1:-1] 

301 pos0 = np.zeros(3) 

302 ndim = int(kwargs.get('dimensions', 3)) 

303 pos0[:ndim] = [float(element) for element in row[1:]] 

304 number = atomic_numbers.get(sym) # Use 0 ~ 'X' for unknown? 

305 tag = 0 

306 if number is None: 

307 if sym not in types: 

308 tag = len(types) + 1 

309 types[sym] = tag 

310 number = 0 

311 tag = types[sym] 

312 tags.append(tag) 

313 numbers.append(number) 

314 positions.append(pos0) 

315 positions = np.array(positions) 

316 tags = np.array(tags, int) 

317 if types: 

318 ase_types = {} 

319 for sym, tag in types.items(): 

320 ase_types[('X', tag)] = sym 

321 info = {'types': ase_types} # 'info' dict for Atoms object 

322 else: 

323 tags = None 

324 info = None 

325 return numbers, positions, tags, info 

326 

327 def read_atoms_from_file(fname, fmt): 

328 assert fname.startswith('"') or fname.startswith("'") 

329 assert fname[0] == fname[-1] 

330 fname = fname[1:-1] 

331 if directory is not None: 

332 fname = os.path.join(directory, fname) 

333 # XXX test xyz, pbd and xsf 

334 if fmt == 'xsf' and 'xsfcoordinatesanimstep' in kwargs: 

335 anim_step = kwargs.pop('xsfcoordinatesanimstep') 

336 theslice = slice(anim_step, anim_step + 1, 1) 

337 # XXX test animstep 

338 else: 

339 theslice = slice(None, None, 1) 

340 images = read(fname, theslice, fmt) 

341 if len(images) != 1: 

342 raise OctopusParseError( 

343 "Expected only one image. Don't know " 

344 'what to do with %d images.' % len(images) 

345 ) 

346 return images[0] 

347 

348 # We will attempt to extract cell and pbc from kwargs if 'lacking'. 

349 # But they might have been left unspecified on purpose. 

350 # 

351 # We need to keep track of these two variables "externally" 

352 # because the Atoms object assigns values when they are not given. 

353 cell = None 

354 pbc = None 

355 adjust_positions_by_half_cell = False 

356 

357 atoms = None 

358 xsfcoords = kwargs.pop('xsfcoordinates', None) 

359 if xsfcoords is not None: 

360 atoms = read_atoms_from_file(xsfcoords, 'xsf') 

361 atoms.positions *= Bohr 

362 atoms.cell *= Bohr 

363 # As it turns out, non-periodic xsf is not supported by octopus. 

364 # Also, it only supports fully periodic or fully non-periodic.... 

365 # So the only thing that we can test here is 3D fully periodic. 

366 if sum(atoms.pbc) != 3: 

367 raise NotImplementedError('XSF not fully periodic with Octopus') 

368 cell = atoms.cell 

369 pbc = atoms.pbc 

370 # Position adjustment doesn't actually matter but this should work 

371 # most 'nicely': 

372 adjust_positions_by_half_cell = False 

373 xyzcoords = kwargs.pop('xyzcoordinates', None) 

374 if xyzcoords is not None: 

375 atoms = read_atoms_from_file(xyzcoords, 'xyz') 

376 atoms.positions *= Bohr 

377 adjust_positions_by_half_cell = True 

378 pdbcoords = kwargs.pop('pdbcoordinates', None) 

379 if pdbcoords is not None: 

380 atoms = read_atoms_from_file(pdbcoords, 'pdb') 

381 pbc = atoms.pbc 

382 adjust_positions_by_half_cell = True 

383 # Due to an error in ASE pdb, we can only test the nonperiodic case. 

384 # atoms.cell *= Bohr # XXX cell? Not in nonperiodic case... 

385 atoms.positions *= Bohr 

386 if sum(atoms.pbc) != 0: 

387 raise NotImplementedError('Periodic pdb not supported by ASE.') 

388 

389 if cell is None: 

390 # cell could not be established from the file, so we set it on the 

391 # Atoms now if possible: 

392 cell, kwargs = kwargs2cell(kwargs) 

393 if cell is not None: 

394 cell *= Bohr 

395 if cell is not None and atoms is not None: 

396 atoms.cell = cell 

397 # In case of boxshape = sphere and similar, we still do not have 

398 # a cell. 

399 

400 ndims = int(kwargs.get('dimensions', 3)) 

401 if ndims != 3: 

402 raise NotImplementedError('Only 3D calculations supported.') 

403 

404 coords = kwargs.get('coordinates') 

405 if coords is not None: 

406 numbers, pos, tags, info = get_positions_from_block('coordinates') 

407 pos *= Bohr 

408 adjust_positions_by_half_cell = True 

409 atoms = Atoms( 

410 cell=cell, numbers=numbers, positions=pos, tags=tags, info=info 

411 ) 

412 rcoords = kwargs.get('reducedcoordinates') 

413 if rcoords is not None: 

414 numbers, spos, tags, info = get_positions_from_block( 

415 'reducedcoordinates' 

416 ) 

417 if cell is None: 

418 raise ValueError( 

419 'Cannot figure out what the cell is, ' 

420 'and thus cannot interpret reduced coordinates.' 

421 ) 

422 atoms = Atoms( 

423 cell=cell, 

424 numbers=numbers, 

425 scaled_positions=spos, 

426 tags=tags, 

427 info=info, 

428 ) 

429 if atoms is None: 

430 raise OctopusParseError('Apparently there are no atoms.') 

431 

432 # Either we have non-periodic BCs or the atoms object already 

433 # got its BCs from reading the file. In the latter case 

434 # we shall override only if PeriodicDimensions was given specifically: 

435 

436 if pbc is None: 

437 pdims = int(kwargs.pop('periodicdimensions', 0)) 

438 pbc = np.zeros(3, dtype=bool) 

439 pbc[:pdims] = True 

440 atoms.pbc = pbc 

441 

442 if ( 

443 cell is not None 

444 and cell.shape == (3,) 

445 and adjust_positions_by_half_cell 

446 ): 

447 nonpbc = atoms.pbc == 0 

448 atoms.positions[:, nonpbc] += np.array(cell)[None, nonpbc] / 2.0 

449 

450 return atoms, kwargs 

451 

452 

453def generate_input(atoms, kwargs): 

454 """Convert atoms and keyword arguments to Octopus input file.""" 

455 _lines = [] 

456 

457 kwargs = {key.lower(): value for key, value in kwargs.items()} 

458 

459 def append(line): 

460 _lines.append(line) 

461 

462 def extend(lines): 

463 _lines.extend(lines) 

464 append('') 

465 

466 def setvar(key, var): 

467 append(f'{key} = {var}') 

468 

469 for kw in ['lsize', 'latticevectors', 'latticeparameters']: 

470 assert kw not in kwargs 

471 

472 defaultboxshape = 'parallelepiped' if atoms.cell.rank > 0 else 'minimum' 

473 boxshape = kwargs.pop('boxshape', defaultboxshape).lower() 

474 use_ase_cell = boxshape == 'parallelepiped' 

475 atomskwargs = atoms2kwargs(atoms, use_ase_cell) 

476 

477 setvar('boxshape', boxshape) 

478 

479 if use_ase_cell: 

480 if 'latticevectors' in atomskwargs: 

481 extend(list2block('LatticeParameters', [[1.0, 1.0, 1.0]])) 

482 block = list2block('LatticeVectors', atomskwargs['latticevectors']) 

483 else: 

484 assert 'lsize' in atomskwargs 

485 block = list2block('LSize', atomskwargs['lsize']) 

486 

487 extend(block) 

488 

489 # Allow override or issue errors? 

490 pdim = 'periodicdimensions' 

491 if pdim in kwargs: 

492 if int(kwargs[pdim]) != int(atomskwargs[pdim]): 

493 raise ValueError( 

494 'Cannot reconcile periodicity in input ' 

495 'with that of Atoms object' 

496 ) 

497 setvar('periodicdimensions', atomskwargs[pdim]) 

498 

499 # We should say that we want the forces if the user requests forces. 

500 # However the Output keyword has changed between Octopus 10.1 and 11.4. 

501 # We'll leave it to the user to manually set keywords correctly. 

502 # The most complicated part is that the user probably needs to tell 

503 # Octopus to save the forces in xcrysden format in order for us to 

504 # parse them. 

505 

506 for key, val in kwargs.items(): 

507 # Most datatypes are straightforward but blocks require some attention. 

508 if isinstance(val, list): 

509 append('') 

510 dict_data = list2block(key, val) 

511 extend(dict_data) 

512 else: 

513 setvar(key, str(val)) 

514 append('') 

515 

516 if 'reducedcoordinates' in atomskwargs: 

517 coord_block = list2block( 

518 'ReducedCoordinates', atomskwargs['reducedcoordinates'] 

519 ) 

520 else: 

521 coord_block = list2block('Coordinates', atomskwargs['coordinates']) 

522 extend(coord_block) 

523 return '\n'.join(_lines) 

524 

525 

526def atoms2kwargs(atoms, use_ase_cell): 

527 kwargs = {} 

528 

529 if any(atoms.pbc): 

530 coordkeyword = 'reducedcoordinates' 

531 coords = atoms.get_scaled_positions(wrap=False) 

532 cellkeyword = 'latticevectors' 

533 else: 

534 coordkeyword = 'coordinates' 

535 coords = atoms.positions / Bohr 

536 cellkeyword = 'lsize' 

537 

538 if use_ase_cell: 

539 cell = atoms.cell / Bohr 

540 if coordkeyword == 'coordinates': 

541 cell_offset = 0.5 * cell.sum(axis=0) 

542 coords -= cell_offset 

543 else: 

544 assert coordkeyword == 'reducedcoordinates' 

545 

546 if cellkeyword == 'lsize': 

547 Lsize = 0.5 * np.diag(cell) 

548 kwargs['lsize'] = [[str(size) for size in Lsize]] 

549 # ASE uses (0...cell) while Octopus uses -L/2...L/2. 

550 # Lsize is really cell / 2, and we have to adjust our 

551 # positions by subtracting Lsize (see construction of the coords 

552 # block) in non-periodic directions. 

553 else: 

554 assert cellkeyword == 'latticevectors' 

555 kwargs['latticevectors'] = cell.tolist() 

556 

557 types = atoms.info.get('types', {}) 

558 

559 coord_block = [] 

560 for sym, pos, tag in zip(atoms.symbols, coords, atoms.get_tags()): 

561 if sym == 'X': 

562 sym = types.get((sym, tag)) 

563 if sym is None: 

564 raise ValueError( 

565 'Cannot represent atom X without tags and ' 

566 'species info in atoms.info' 

567 ) 

568 coord_block.append([repr(sym)] + [str(x) for x in pos]) 

569 

570 kwargs[coordkeyword] = coord_block 

571 npbc = sum(atoms.pbc) 

572 for c in range(npbc): 

573 if not atoms.pbc[c]: 

574 msg = ( 

575 'Boundary conditions of Atoms object inconsistent ' 

576 'with requirements of Octopus. pbc must be either ' 

577 '000, 100, 110, or 111.' 

578 ) 

579 raise ValueError(msg) 

580 kwargs['periodicdimensions'] = npbc 

581 

582 # TODO InitialSpins 

583 # 

584 # TODO can use maximumiterations + output/outputformat to extract 

585 # things from restart file into output files without trouble. 

586 # 

587 # Velocities etc.? 

588 return kwargs 

589 

590 

591@reader 

592def read_octopus_in(fd, get_kwargs=False): 

593 kwargs = parse_input_file(fd) 

594 

595 # input files may contain internal references to other files such 

596 # as xyz or xsf. We need to know the directory where the file 

597 # resides in order to locate those. If fd is a real file 

598 # object, it contains the path and we can use it. Else assume 

599 # pwd. 

600 # 

601 # Maybe this is ugly; maybe it can lead to strange bugs if someone 

602 # wants a non-standard file-like type. But it's probably better than 

603 # failing 'ase gui somedir/inp' 

604 try: 

605 fname = fd.name 

606 except AttributeError: 

607 directory = None 

608 else: 

609 directory = os.path.split(fname)[0] 

610 

611 atoms, remaining_kwargs = kwargs2atoms(kwargs, directory=directory) 

612 if get_kwargs: 

613 return atoms, remaining_kwargs 

614 else: 

615 return atoms