Coverage for ase/io/onetep.py: 82.94%

1import re

2import warnings

3from copy import deepcopy

4from os.path import dirname, isfile

5from pathlib import Path

7import numpy as np

9from ase.atoms import Atoms

10from ase.calculators.singlepoint import SinglePointDFTCalculator

11from ase.cell import Cell

12from ase.units import Bohr

14no_positions_error = (

15 'no positions can be read from this onetep output '

16 'if you wish to use ASE to read onetep outputs '

17 'please use uppercase block positions in your calculations'

18)

20unable_to_read = 'unable to read this onetep output file, ending'

22# taken from onetep source code,

23# does not seem to be from any known NIST data

24units = {'Hartree': 27.2116529, 'Bohr': 1 / 1.889726134583548707935}

26# Want to add a functionality? add a global constant below

27ONETEP_START = re.compile(

28 r'(?i)^\s*\|\s*Linear-Scaling\s*Ab\s*'

29 r'Initio\s*Total\s*Energy\s*Program\s*\|\s*$'

30)

31ONETEP_STOP = re.compile(r'(?i)^\s*-+\s*TIMING\s*INFORMATION\s*-+\s*$')

32ONETEP_TOTAL_ENERGY = re.compile(

33 r'(?i)^\s*\|\s*\*{3}\s*NGWF\s*' r'optimisation\s*converged\s*\*{3}\s*\|\s*$'

34)

35ONETEP_FORCE = re.compile(r'(?i)^\s*\*+\s*Forces\s*\*+\s*$')

36ONETEP_MULLIKEN = re.compile(r'(?i)^\s*Mulliken\s*Atomic\s*Populations\s*$')

37ONETEP_SPIN = re.compile(r'(?i)^\s*Down\s*spin\s*density')

38ONETEP_POSITION = re.compile(r'(?i)^\s*Cell\s*Contents\s*$')

39ONETEP_FIRST_POSITION = re.compile(

40 r'^\s*%BLOCK\s*POSITIONS\s*_?\s*(ABS|FRAC)\s*:?\s*([*#!].*)?$'

41)

42ONETEP_WRONG_FIRST_POSITION = re.compile(

43 r'^\s*%block\s*positions\s*_?\s*(abs|frac)\s*:?\s*([*#!].*)?$'

44)

45ONETEP_RESUMING_GEOM = re.compile(

46 r'(?i)^\s*<{16}\s*Resuming\s*previous'

47 r'\s*ONETEP\s*Geometry\s*Optimisation\s*>{16}\s*$'

48)

50ONETEP_ATOM_COUNT = re.compile(r'(?i)^\s*Totals\s*:\s*(\d+\s*)*$')

51ONETEP_IBFGS_ITER = re.compile(r'(?i)^\s*BFGS\s*:\s*starting\s*iteration')

52ONETEP_IBFGS_IMPROVE = re.compile(r'(?i)^\s*BFGS\s*:\s*improving\s*iteration')

53ONETEP_START_GEOM = re.compile(

54 r'(?i)^<+\s*Starting\s*ONETEP\s*Geometry\s*Optimisation\s*>+$'

55)

56ONETEP_END_GEOM = re.compile(r'(?i)^\s*BFGS\s*:\s*Final\s*Configuration:\s*$')

58ONETEP_SPECIES = re.compile(r'(?i)^\s*%BLOCK\s*SPECIES\s*:?\s*([*#!].*)?$')

60ONETEP_FIRST_CELL = re.compile(

61 r'(?i)^\s*%BLOCK\s*LATTICE\s*_?\s*CART\s*:?\s*([*#!].*)?$'

62)

63ONETEP_STRESS_CELL = re.compile(

64 r'(?i)^\s*stress_calculation:\s*cell\s*geometry\s*$'

65)

68def get_onetep_keywords(path):

69 if isinstance(path, str):

70 with open(path) as fd:

71 results = read_onetep_in(fd, only_keywords=True)

72 else:

73 results = read_onetep_in(path, only_keywords=True)

75 # If there is an include file, the entire

76 # file keyword's will be included in the dict

77 # and the include_file keyword will be deleted

78 if 'include_file' in results['keywords']:

79 warnings.warn('include_file will be deleted from the dict')

80 del results['keywords']['include_file']

81 return results['keywords']

84def read_onetep_in(fd, **kwargs):

85 """

86 Read a single ONETEP input.

88 This function can be used to visually check ONETEP inputs,

89 using the ase gui. It can also be used to get access to

90 the input parameters attached to the ONETEP calculator

91 returned

93 The function should work on inputs which contain

94 'include_file' command(s), (possibly recursively

95 but untested)

97 The function should work on input which contains

98 exotic element(s) name(s) if the specie block is

99 present to map them back to real element(s)

100

101 Parameters

102 ----------

103 fd : io-object

104 File to read.

105

106 Return

107 ------

108 structure: Atoms

109 Atoms object with cell and a Onetep calculator

110 attached which contains the keywords dictionary

111 """

112

113 fdi_lines = fd.readlines()

114

115 try:

116 fd_path = Path(fd.name).resolve()

117 fd_parent = fd_path.parent

118 include_files = [fd_path]

119 except AttributeError:

120 # We are in a StringIO or something similar

121 fd_path = Path().cwd()

122 fd_parent = fd_path

123 include_files = [Path().cwd()]

124

125 def clean_lines(lines):

126 """

127 Remove indesirable line from the input

128 """

129 new_lines = []

130 for line in lines:

131 sep = re.split(r'[!#]', line.strip())[0]

132 if sep:

133 new_lines.append(sep)

134 return new_lines

135

136 # Skip comments and empty lines

137 fdi_lines = clean_lines(fdi_lines)

138

139 # Are we in a block?

140 block_start = 0

141

142 keywords = {}

143 atoms = Atoms()

144 cell = np.zeros((3, 3))

145 fractional = False

146 positions = False

147 symbols = False

148

149 # Main loop reading the input

150 for n, line in enumerate(fdi_lines):

151 line_lower = line.lower()

152 if re.search(r'^\s*%block', line_lower):

153 block_start = n + 1

154 if re.search(r'lattice_cart$', line_lower):

155 if re.search(r'^\s*ang\s*$', fdi_lines[block_start]):

156 cell = np.loadtxt(fdi_lines[n + 2 : n + 5])

157 else:

158 cell = np.loadtxt(fdi_lines[n + 1 : n + 4])

159 cell *= Bohr

160

161 if not block_start:

162 if 'devel_code' in line_lower:

163 warnings.warn('devel_code is not supported')

164 continue

165 # Splits line on any valid onetep separator

166 sep = re.split(r'[:=\s]+', line)

167 keywords[sep[0]] = ' '.join(sep[1:])

168 # If include_file is used, we open the included file

169 # and insert it in the current fdi_lines...

170 # ONETEP does not work with cascade

171 # and this SHOULD NOT work with cascade

172 if re.search(r'^\s*include_file$', sep[0]):

173 name = sep[1].replace("'", '')

174 name = name.replace('"', '')

175 new_path = fd_parent / name

176 for path in include_files:

177 if new_path.samefile(path):

178 raise ValueError('invalid/recursive include_file')

179 new_fd = open(new_path)

180 new_lines = new_fd.readlines()

181 new_lines = clean_lines(new_lines)

182 for include_line in new_lines:

183 sep = re.split(r'[:=\s]+', include_line)

184 if re.search(r'^\s*include_file$', sep[0]):

185 raise ValueError('nested include_file')

186 fdi_lines[:] = (

187 fdi_lines[: n + 1] + new_lines + fdi_lines[n + 1 :]

188 )

189 include_files.append(new_path)

190 continue

191

192 if re.search(r'^\s*%endblock', line_lower):

193 if re.search(r'\s*positions_', line_lower):

194 head = re.search(r'(?i)^\s*(\S*)\s*$', fdi_lines[block_start])

195 head = head.group(1).lower() if head else ''

196 conv = 1 if head == 'ang' else units['Bohr']

197 # Skip one line if head is True

198 to_read = fdi_lines[block_start + int(bool(head)) : n]

199 positions = np.loadtxt(to_read, usecols=(1, 2, 3))

200 positions *= conv

201 symbols = np.loadtxt(to_read, usecols=(0), dtype='str')

202 if re.search(r'.*frac$', line_lower):

203 fractional = True

204 elif re.search(r'^\s*%endblock\s*species$', line_lower):

205 els = fdi_lines[block_start:n]

206 species = {}

207 for el in els:

208 sep = el.split()

209 species[sep[0]] = sep[1]

210 to_read = [i.strip() for i in fdi_lines[block_start:n]]

211 keywords['species'] = to_read

212 elif re.search(r'lattice_cart$', line_lower):

213 pass

214 else:

215 to_read = [i.strip() for i in fdi_lines[block_start:n]]

216 block_title = line_lower.replace('%endblock', '').strip()

217 keywords[block_title] = to_read

218 block_start = 0

219

220 # We don't need a fully valid onetep

221 # input to read the keywords, just

222 # the keywords

223 if kwargs.get('only_keywords', False):

224 return {'keywords': keywords}

225 # Necessary if we have only one atom

226 # Check if the cell is valid (3D)

227 if not cell.any(axis=1).all():

228 raise ValueError('invalid cell specified')

229

230 if positions is False:

231 raise ValueError('invalid position specified')

232

233 if symbols is False:

234 raise ValueError('no symbols found')

235

236 positions = positions.reshape(-1, 3)

237 symbols = symbols.reshape(-1)

238 tags = []

239 info = {'onetep_species': []}

240 for symbol in symbols:

241 label = symbol.replace(species[symbol], '')

242 if label.isdigit():

243 tags.append(int(label))

244 else:

245 tags.append(0)

246 info['onetep_species'].append(symbol)

247 atoms = Atoms(

248 [species[i] for i in symbols], cell=cell, pbc=True, tags=tags, info=info

249 )

250 if fractional:

251 atoms.set_scaled_positions(positions / units['Bohr'])

252 else:

253 atoms.set_positions(positions)

254 results = {'atoms': atoms, 'keywords': keywords}

255 return results

256

257

258def write_onetep_in(

259 fd,

260 atoms,

261 edft=False,

262 xc='PBE',

263 ngwf_count=-1,

264 ngwf_radius=9.0,

265 keywords={},

266 pseudopotentials={},

267 pseudo_path='.',

268 pseudo_suffix=None,

269 **kwargs,

270):

271 """

272 Write a single ONETEP input.

273

274 This function will be used by ASE to perform

275 various workflows (Opt, NEB...) or can be used

276 manually to quickly create ONETEP input file(s).

277

278 The function will first write keywords in

279 alphabetic order in lowercase. Secondly, blocks

280 will be written in alphabetic order in uppercase.

281

282 Two ways to work with the function:

283

284 - By providing only (simple) keywords present in

285 the parameters. ngwf_count and ngwf_radius

286 accept multiple types as described in the Parameters

287 section.

288

289 - If the keywords parameters is provided as a dictionary

290 these keywords will be used to write the input file and

291 will take priority.

292

293 If no pseudopotentials are provided in the parameters and

294 the function will try to look for suitable pseudopotential

295 in the pseudo_path.

296

297 Parameters

298 ----------

299 fd : file

300 File to write.

301 atoms: Atoms

302 Atoms including Cell object to write.

303 edft: Bool

304 Activate EDFT.

305 xc: str

306 DFT xc to use e.g (PBE, RPBE, ...)

307 ngwf_count: int|list|dict

308 Behaviour depends on the type:

309 int: every species will have this amount

310 of ngwfs.

311 list: list of int, will be attributed

312 alphabetically to species:

313 dict: keys are species name(s),

314 value are their number:

315 ngwf_radius: int|list|dict

316 Behaviour depends on the type:

317 float: every species will have this radius.

318 list: list of float, will be attributed

319 alphabetically to species:

320 [10.0, 9.0]

321 dict: keys are species name(s),

322 value are their radius:

323 {'Na': 9.0, 'Cl': 10.0}

324 keywords: dict

325 Dictionary with ONETEP keywords to write,

326 keywords with lists as values will be

327 treated like blocks, with each element

328 of list being a different line.

329 pseudopotentials: dict

330 Behaviour depends on the type:

331 keys are species name(s) their

332 value are the pseudopotential file to use:

333 {'Na': 'Na.usp', 'Cl': 'Cl.usp'}

334 pseudo_path: str

335 Where to look for pseudopotential, correspond

336 to the pseudo_path keyword of ONETEP.

337 pseudo_suffix: str

338 Suffix for the pseudopotential filename

339 to look for, useful if you have multiple sets of

340 pseudopotentials in pseudo_path.

341 """

342

343 label = kwargs.get('label', 'onetep')

344 try:

345 directory = kwargs.get('directory', Path(dirname(fd.name)))

346 except AttributeError:

347 directory = '.'

348 autorestart = kwargs.get('autorestart', False)

349 elements = np.array(atoms.symbols)

350 tags = np.array(atoms.get_tags())

351 species_maybe = atoms.info.get('onetep_species', False)

352 # We look if the atom.info contains onetep species information

353 # If it does, we use it, as it might contains character

354 # which are not allowed in ase tags, if not we fall back

355 # to tags and use them instead.

356 if species_maybe:

357 if set(species_maybe) != set(elements):

358 species = np.array(species_maybe)

359 else:

360 species = elements

361 else:

362 formatted_tags = np.array(['' if i == 0 else str(i) for i in tags])

363 species = np.char.add(elements, formatted_tags)

364 numbers = np.array(atoms.numbers)

365 tmp = np.argsort(species)

366 # We sort both Z and name the same

367 numbers = np.take_along_axis(numbers, tmp, axis=0)

368 # u_elements = np.take_along_axis(elements, tmp, axis=0)

369 u_species = np.take_along_axis(species, tmp, axis=0)

370 elements = np.take_along_axis(elements, tmp, axis=0)

371 # We want to keep unique but without sort: small trick with index

372 idx = np.unique(u_species, return_index=True)[1]

373 elements = elements[idx]

374 # Unique species

375 u_species = u_species[idx]

376 numbers = numbers[idx]

377 n_sp = len(u_species)

378

379 if isinstance(ngwf_count, int):

380 ngwf_count = dict(zip(u_species, [ngwf_count] * n_sp))

381 elif isinstance(ngwf_count, list):

382 ngwf_count = dict(zip(u_species, ngwf_count))

383 elif isinstance(ngwf_count, dict):

384 pass

385 else:

386 raise TypeError('ngwf_count can only be int|list|dict')

387

388 if isinstance(ngwf_radius, float):

389 ngwf_radius = dict(zip(u_species, [ngwf_radius] * n_sp))

390 elif isinstance(ngwf_radius, list):

391 ngwf_radius = dict(zip(u_species, ngwf_radius))

392 elif isinstance(ngwf_radius, dict):

393 pass

394 else:

395 raise TypeError('ngwf_radius can only be float|list|dict')

396

397 pp_files = re.sub('\'|"', '', keywords.get('pseudo_path', pseudo_path))

398 pp_files = Path(pp_files).glob('*')

399 pp_files = [i for i in pp_files if i.is_file()]

400

401 if pseudo_suffix:

402 common_suffix = [pseudo_suffix]

403 else:

404 common_suffix = ['.usp', '.recpot', '.upf', '.paw', '.psp', '.pspnc']

405

406 if keywords.get('species_pot', False):

407 pp_list = keywords['species_pot']

408 elif isinstance(pseudopotentials, dict):

409 pp_list = []

410 for idx, el in enumerate(u_species):

411 if el in pseudopotentials:

412 pp_list.append(f'{el} {pseudopotentials[el]}')

413 else:

414 for i in pp_files:

415 reg_el_candidate = re.split(r'[-_.:= ]+', i.stem)[0]

416 if (

417 elements[idx] == reg_el_candidate.title()

418 and i.suffix.lower() in common_suffix

419 ):

420 pp_list.append(f'{el} {i.name}')

421 else:

422 raise TypeError('pseudopotentials object can only be dict')

423

424 default_species = []

425 for idx, el in enumerate(u_species):

426 tmp = ''

427 tmp += u_species[idx] + ' ' + elements[idx] + ' '

428 tmp += str(numbers[idx]) + ' '

429 try:

430 tmp += str(ngwf_count[el]) + ' '

431 except KeyError:

432 tmp += str(ngwf_count[elements[idx]]) + ' '

433 try:

434 tmp += str(ngwf_radius[el])

435 except KeyError:

436 tmp += str(ngwf_radius[elements[idx]])

437 default_species.append(tmp)

438

439 positions_abs = ['ang']

440 for s, p in zip(species, atoms.get_positions()):

441 line = '{s:>5} {0:>12.6f} {1:>12.6f} {2:>12.6f}'.format(s=s, *p)

442 positions_abs.append(line)

443

444 lattice_cart = ['ang']

445 for axis in atoms.get_cell():

446 line = '{:>16.8f} {:>16.8f} {:>16.8f}'.format(*axis)

447 lattice_cart.append(line)

448

449 # Default keywords if not provided by the user,

450 # most of them are ONETEP default, except write_forces

451 # which is always turned on.

452 default_keywords = {

453 'xc_functional': xc,

454 'edft': edft,

455 'cutoff_energy': 20,

456 'paw': False,

457 'task': 'singlepoint',

458 'output_detail': 'normal',

459 'species': default_species,

460 'pseudo_path': pseudo_path,

461 'species_pot': pp_list,

462 'positions_abs': positions_abs,

463 'lattice_cart': lattice_cart,

464 'write_forces': True,

465 'forces_output_detail': 'verbose',

466 }

467

468 # Main loop, fill the keyword dictionary

469 keywords = {key.lower(): value for key, value in keywords.items()}

470 for value in default_keywords:

471 if not keywords.get(value, None):

472 keywords[value] = default_keywords[value]

473

474 # No pseudopotential provided, we look for them in pseudo_path

475 # If autorestart is True, we look for restart files,

476 # and turn on relevant keywords...

477 if autorestart:

478 keywords['read_denskern'] = isfile(directory / (label + '.dkn'))

479 keywords['read_tightbox_ngwfs'] = isfile(

480 directory / (label + '.tightbox_ngwfs')

481 )

482 keywords['read_hamiltonian'] = isfile(directory / (label + '.ham'))

483

484 # If not EDFT, hamiltonian is irrelevant.

485 # print(keywords.get('edft', False))

486 # keywords['read_hamiltonian'] = \

487 # keywords.get('read_hamiltonian', False) & keywords.get('edft', False)

488

489 keywords = dict(sorted(keywords.items()))

490

491 lines = []

492 block_lines = []

493

494 for key, value in keywords.items():

495 if isinstance(value, (list, np.ndarray)):

496 if not all(isinstance(_, str) for _ in value):

497 raise TypeError('list values for blocks must be strings only')

498 block_lines.append(('\n%block ' + key).upper())

499 block_lines.extend(value)

500 block_lines.append(('%endblock ' + key).upper())

501 elif isinstance(value, bool):

502 lines.append(str(key) + ' : ' + str(value)[0])

503 elif isinstance(value, (str, int, float)):

504 lines.append(str(key) + ' : ' + str(value))

505 else:

506 raise TypeError('keyword values must be list|str|bool')

507 input_header = (

508 '!'

509 + '-' * 78

510 + '!\n'

511 + '!'

512 + '-' * 33

513 + ' INPUT FILE '

514 + '-' * 33

515 + '!\n'

516 + '!'

517 + '-' * 78

518 + '!\n\n'

519 )

520

521 input_footer = (

522 '\n!'

523 + '-' * 78

524 + '!\n'

525 + '!'

526 + '-' * 32

527 + ' END OF INPUT '

528 + '-' * 32

529 + '!\n'

530 + '!'

531 + '-' * 78

532 + '!'

533 )

534

535 fd.write(input_header)

536 fd.writelines(line + '\n' for line in lines)

537 fd.writelines(b_line + '\n' for b_line in block_lines)

538

539 if 'devel_code' in kwargs:

540 warnings.warn('writing devel code as it is, at the end of the file')

541 fd.writelines('\n' + line for line in kwargs['devel_code'])

542

543 fd.write(input_footer)

544

545

546def match_outputs(fdo_lines, output_keys):

547 # Find all matches, append them to the dictionary

548 breg = '|'.join([i.pattern.replace('(?i)', '') for i in output_keys])

549 prematch = {}

550

551 for idx, line in enumerate(fdo_lines):

552 matches = re.search(breg, line)

553 if matches:

554 prematch[idx] = matches.group(0)

555

556 output = {key: [] for key in output_keys}

557 for key, value in prematch.items():

558 for reg in output_keys:

559 if re.search(reg, value):

560 output[reg].append(key)

561 break

562

563 return {key: np.array(value) for key, value in output.items()}

564

565

566def read_onetep_out(fd, index=-1, improving=False, **kwargs):

567 """

568 Read ONETEP output(s).

569

570 !!!

571 This function will be used by ASE when performing

572 various workflows (opt, NEB...)

573 !!!

574

575 Parameters

576 ----------

577 fd : file

578 File to read.

579 index: slice

580 Which atomic configuration to read

581 improving: Bool

582 If the output is a geometry optimisation,

583 improving = True will keep line search

584 configuration from BFGS

585

586 Yields

587 ------

588 structure: Atoms|list of Atoms

589 """

590 # Put everything in memory

591 fdo_lines = fd.readlines()

592 n_lines = len(fdo_lines)

593

594 # Used to store index of important elements

595 output = {

596 ONETEP_START: [],

597 ONETEP_STOP: [],

598 ONETEP_TOTAL_ENERGY: [],

599 ONETEP_FORCE: [],

600 ONETEP_SPIN: [],

601 ONETEP_MULLIKEN: [],

602 ONETEP_POSITION: [],

603 ONETEP_FIRST_POSITION: [],

604 ONETEP_WRONG_FIRST_POSITION: [],

605 ONETEP_ATOM_COUNT: [],

606 ONETEP_IBFGS_IMPROVE: [],

607 ONETEP_IBFGS_ITER: [],

608 ONETEP_START_GEOM: [],

609 ONETEP_RESUMING_GEOM: [],

610 ONETEP_END_GEOM: [],

611 ONETEP_SPECIES: [],

612 ONETEP_FIRST_CELL: [],

613 ONETEP_STRESS_CELL: [],

614 }

615

616 # Index will be treated to get rid of duplicate or improving iterations

617 output_corr = deepcopy(output)

618

619 # Core properties that will be used in Yield

620 properties = [

621 ONETEP_TOTAL_ENERGY,

622 ONETEP_FORCE,

623 ONETEP_MULLIKEN,

624 ONETEP_FIRST_CELL,

625 ]

626

627 output = match_outputs(fdo_lines, [*output])

628

629 # Conveniance notation (pointers: no overhead, no additional memory)

630 ibfgs_iter = np.hstack((output[ONETEP_IBFGS_ITER], output[ONETEP_END_GEOM]))

631 ibfgs_start = output[ONETEP_START_GEOM]

632 ibfgs_improve = output[ONETEP_IBFGS_IMPROVE]

633 ibfgs_resume = output[ONETEP_RESUMING_GEOM]

634

635 onetep_start = output[ONETEP_START]

636 onetep_stop = output[ONETEP_STOP]

637

638 bfgs_keywords = np.hstack((ibfgs_improve, ibfgs_resume, ibfgs_iter))

639 bfgs_keywords = np.sort(bfgs_keywords)

640

641 core_keywords = np.hstack(

642 (

643 ibfgs_iter,

644 ibfgs_start,

645 ibfgs_improve,

646 ibfgs_resume,

647 ibfgs_iter,

648 onetep_start,

649 onetep_stop,

650 )

651 )

652 core_keywords = np.sort(core_keywords)

653

654 i_first_positions = output[ONETEP_FIRST_POSITION]

655 is_frac_positions = [i for i in i_first_positions if 'FRAC' in fdo_lines[i]]

656

657 # In onetep species can have arbritary names,

658 # We want to map them to real element names

659 # Via the species block

660 # species = np.concatenate((output[ONETEP_SPECIES],

661 # output[ONETEP_SPECIESL])).astype(np.int32)

662 species = output[ONETEP_SPECIES]

663

664 icells = np.hstack((output[ONETEP_FIRST_CELL], output[ONETEP_STRESS_CELL]))

665 icells = icells.astype(np.int32)

666 # Using the fact that 0 == False and > 0 == True

667 has_bfgs = (

668 len(ibfgs_iter)

669 + len(output[ONETEP_START_GEOM])

670 + len(output[ONETEP_RESUMING_GEOM])

671 )

672

673 # When the input block position is written in lowercase

674 # ONETEP does not print the initial position but a hash

675 # of it, might be needed

676 has_hash = len(output[ONETEP_WRONG_FIRST_POSITION])

677

678 def is_in_bfgs(idx):

679 """

680 Check if a given index is in a BFGS block

681 """

682 for past, future in zip(

683 output[ONETEP_START],

684 np.hstack((output[ONETEP_START][1:], [n_lines])),

685 ):

686 if past < idx < future:

687 if np.any(

688 (past < ibfgs_start) & (ibfgs_start < future)

689 ) or np.any((past < ibfgs_resume) & (ibfgs_resume < future)):

690 return True

691 return False

692

693 def where_in_bfgs(idx):

694 for past, future in zip(

695 core_keywords, np.hstack((core_keywords[1:], [n_lines]))

696 ):

697 if past < idx < future:

698 if past in onetep_start:

699 if future in ibfgs_start or future in ibfgs_resume:

700 return 'resume'

701 continue

702 # Are we in start or resume or improve

703 if past in ibfgs_start:

704 return 'start'

705 elif past in ibfgs_resume:

706 return 'resume'

707 elif past in ibfgs_improve:

708 return 'improve'

709

710 return False

711

712 ipositions = np.hstack((output[ONETEP_POSITION], i_first_positions)).astype(

713 np.int32

714 )

715 ipositions = np.sort(ipositions)

716

717 n_pos = len(ipositions)

718

719 # Some ONETEP files will not have any positions

720 # due to how the software is coded. As a last

721 # resort we look for a geom file with the same label.

722

723 if n_pos == 0:

724 if has_hash:

725 raise RuntimeError(no_positions_error)

726 raise RuntimeError(unable_to_read)

727

728 to_del = []

729

730 # Important loop which:

731 # - Get rid of improving BFGS iteration if improving == False

732 # - Append None to properties to make sure each properties will

733 # have the same length and each index correspond to the right

734 # atomic configuration (hopefully).

735 # Past is the index of the current atomic conf, future is the

736 # index of the next one.

737

738 for idx, (past, future) in enumerate(

739 zip(ipositions, np.hstack((ipositions[1:], [n_lines])))

740 ):

741 if has_bfgs:

742 which_bfgs = where_in_bfgs(past)

743

744 if which_bfgs == 'resume':

745 to_del.append(idx)

746 continue

747

748 if not improving:

749 if which_bfgs == 'improve':

750 to_del.append(idx)

751 continue

752

753 # We append None if no properties in contained for

754 # one specific atomic configurations.

755 for prop in properties:

756 (tmp,) = np.where((past < output[prop]) & (output[prop] <= future))

757 if len(tmp) == 0:

758 output_corr[prop].append(None)

759 else:

760 output_corr[prop].extend(output[prop][tmp[:1]])

761

762 if to_del and len(to_del) != n_pos:

763 new_indices = np.setdiff1d(np.arange(n_pos), to_del)

764 ipositions = ipositions[new_indices]

765

766 helper = _OnetepHelper(fdo_lines)

767

768 cells = []

769 for idx in icells:

770 if idx in output[ONETEP_STRESS_CELL]:

771 cell = helper.parse_cell(idx) if idx else None

772 else:

773 cell = helper.parse_first_cell(idx) if idx else None

774 cells.append(cell)

775

776 def parse_multiple(parsefunc, indices):

777 return [parsefunc(idx) if idx else None for idx in indices]

778

779 charges = parse_multiple(helper.parse_charge, output_corr[ONETEP_MULLIKEN])

780 energies = parse_multiple(

781 helper.parse_energy, output_corr[ONETEP_TOTAL_ENERGY]

782 )

783 magmoms = parse_multiple(helper.parse_spin, output_corr[ONETEP_MULLIKEN])

784

785 real_species = []

786 for idx in species:

787 real_specie = helper.parse_species(idx)

788 real_species.append(real_specie)

789

790 positions, forces = [], []

791 for idx in ipositions:

792 if idx in i_first_positions:

793 position = helper.parse_first_positions(idx, species, real_species)

794 else:

795 position = helper.parse_positions(idx, species, real_species)

796 if position:

797 positions.append(position)

798 else:

799 n_pos -= 1

800 break

801 for idx in output_corr[ONETEP_FORCE]:

802 force = helper.parse_force(idx) if idx else None

803 forces.append(force)

804

805 n_pos = len(positions)

806

807 # Numpy trick to get rid of configuration that are essentially the same

808 # in a regular geometry optimisation with internal BFGS, the first

809 # configuration is printed three time, we get rid of it

810 properties = [energies, forces, charges, magmoms]

811

812 if has_bfgs:

813 tmp = [i.positions for i in positions]

814 to_del = []

815 for i in range(len(tmp[:-1])):

816 if is_in_bfgs(ipositions[i]):

817 if np.array_equal(tmp[i], tmp[i + 1]):

818 # If the deleted configuration has a property

819 # we want to keep it

820 for prop in properties:

821 if prop[i + 1] is not None and prop[i] is None:

822 prop[i] = prop[i + 1]

823 to_del.append(i + 1)

824 c = np.full((len(tmp)), True)

825 c[to_del] = False

826 energies = [energies[i] for i in range(n_pos) if c[i]]

827 forces = [forces[i] for i in range(n_pos) if c[i]]

828 charges = [charges[i] for i in range(n_pos) if c[i]]

829 magmoms = [magmoms[i] for i in range(n_pos) if c[i]]

830 positions = [positions[i] for i in range(n_pos) if c[i]]

831 ipositions = [ipositions[i] for i in range(n_pos) if c[i]]

832

833 n_pos = len(positions)

834

835 # We take care of properties that only show up at

836 # the beginning of onetep calculation

837 whole = np.append(output[ONETEP_START], n_lines)

838

839 if n_pos == 0:

840 raise RuntimeError(unable_to_read)

841

842 spin = np.full((n_pos), 1)

843 for sp in output[ONETEP_SPIN]:

844 tmp = zip(whole, whole[1:])

845 for past, future in tmp:

846 if past < sp < future:

847 p = (past < ipositions) & (ipositions < future)

848 spin[p] = 2

849

850 cells_all = np.ones((n_pos, 3, 3))

851 for idx, cell in enumerate(output[ONETEP_FIRST_CELL]):

852 tmp = zip(whole, whole[1:])

853 for past, future in tmp:

854 if past < cell < future:

855 p = (past < ipositions) & (ipositions < future)

856 cells_all[p] = cells[idx]

857

858 # Prepare atom objects with all the properties

859 if isinstance(index, int):

860 indices = [range(n_pos)[index]]

861 else:

862 indices = range(n_pos)[index]

863

864 for idx in indices:

865 positions[idx].set_cell(cells_all[idx])

866 if ipositions[idx] in is_frac_positions:

867 positions[idx].set_scaled_positions(positions[idx].get_positions())

868 positions[idx].set_initial_charges(charges[idx])

869 calc = SinglePointDFTCalculator(

870 positions[idx],

871 energy=energies[idx] if energies else None,

872 free_energy=energies[idx] if energies else None,

873 forces=forces[idx] if forces else None,

874 charges=charges[idx] if charges else None,

875 magmoms=magmoms[idx] if magmoms else None,

876 )

877 # calc.kpts = [(0, 0, 0) for _ in range(spin[idx])]

878 positions[idx].calc = calc

879 yield positions[idx]

880

881

882class _OnetepHelper:

883 def __init__(self, fdo_lines):

884 self.fdo_lines = fdo_lines

885

886 # Bunch of methods to grep properties from output.

887 def parse_cell(self, idx):

888 a, b, c = np.loadtxt([self.fdo_lines[idx + 2]]) * units['Bohr']

889 al, be, ga = np.loadtxt([self.fdo_lines[idx + 4]])

890 cell = Cell.fromcellpar([a, b, c, al, be, ga])

891 return np.array(cell)

892

893 def parse_charge(self, idx):

894 n = 0

895 offset = 4

896 while idx + n < len(self.fdo_lines):

897 if not self.fdo_lines[idx + n].strip():

898 tmp_charges = np.loadtxt(

899 self.fdo_lines[idx + offset : idx + n - 1], usecols=3

900 )

901 return np.reshape(tmp_charges, -1)

902 n += 1

903 return None

904

905 # In ONETEP there is no way to differentiate electronic entropy

906 # and entropy due to solvent, therefore there is no way to

907 # extrapolate the energy at 0 K. We return the last energy

908 # instead.

909

910 def parse_energy(self, idx):

911 freg = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+\-]?\d+)?')

912

913 n = 0

914 while idx + n < len(self.fdo_lines):

915 if re.search(

916 r'^\s*\|\s*Total\s*:.*\|\s*$', self.fdo_lines[idx + n]

917 ):

918 energy_str = re.search(freg, self.fdo_lines[idx + n]).group(0)

919 return float(energy_str) * units['Hartree']

920 n += 1

921 return None

922

923 def parse_first_cell(self, idx):

924 n = 0

925 offset = 1

926 while idx + n < len(self.fdo_lines):

927 if re.search(

928 r'(?i)^\s*"?\s*ang\s*"?\s*([*#!].*)?$', self.fdo_lines[idx + n]

929 ):

930 offset += 1

931 if re.search(

932 r'(?i)^\s*%ENDBLOCK\s*LATTICE'

933 r'\s*_?\s*CART\s*:?\s*([*#!].*)?$',

934 self.fdo_lines[idx + n],

935 ):

936 cell = np.loadtxt(self.fdo_lines[idx + offset : idx + n])

937 return cell if offset == 2 else cell * units['Bohr']

938 n += 1

939 return None

940

941 def parse_first_positions(self, idx, species, real_species):

942 n = 0

943 offset = 1

944 while idx + n < len(self.fdo_lines):

945 if re.search(

946 r'(?i)^\s*"?\s*ang\s*"?\s*([*#!].*)?$', self.fdo_lines[idx + n]

947 ):

948 offset += 1

949 if re.search(

950 r'^\s*%ENDBLOCK\s*POSITIONS_', self.fdo_lines[idx + n]

951 ):

952 if 'FRAC' in self.fdo_lines[idx + n]:

953 conv_factor = 1

954 else:

955 conv_factor = units['Bohr']

956 tmp = np.loadtxt(

957 self.fdo_lines[idx + offset : idx + n], dtype='str'

958 ).reshape(-1, 4)

959 els = np.char.array(tmp[:, 0])

960 if offset == 2:

961 pos = tmp[:, 1:].astype(np.float64)

962 else:

963 pos = tmp[:, 1:].astype(np.float64) * conv_factor

964 try:

965 atoms = Atoms(els, pos)

966 # ASE doesn't recognize names used in ONETEP

967 # as chemical symbol: dig deeper

968 except KeyError:

969 tags, real_elements = self.find_correct_species(

970 els, idx, species, real_species, first=True

971 )

972 atoms = Atoms(real_elements, pos)

973 atoms.set_tags(tags)

974 atoms.info['onetep_species'] = list(els)

975 return atoms

976 n += 1

977 return None

978

979 def parse_force(self, idx):

980 n = 0

981 while idx + n < len(self.fdo_lines):

982 if re.search(

983 r'(?i)^\s*\*\s*TOTAL:.*\*\s*$', self.fdo_lines[idx + n]

984 ):

985 tmp = np.loadtxt(

986 self.fdo_lines[idx + 6 : idx + n - 2],

987 dtype=np.float64,

988 usecols=(3, 4, 5),

989 )

990 return tmp * units['Hartree'] / units['Bohr']

991 n += 1

992 return None

993

994 def parse_positions(self, idx, species, real_species):

995 n = 0

996 offset = 7

997 stop = 0

998 while idx + n < len(self.fdo_lines):

999 if re.search(r'^\s*x{60,}\s*$', self.fdo_lines[idx + n]):

1000 stop += 1

1001 if stop == 2:

1002 tmp = np.loadtxt(

1003 self.fdo_lines[idx + offset : idx + n],

1004 dtype='str',

1005 usecols=(1, 3, 4, 5),

1006 )

1007 els = np.char.array(tmp[:, 0])

1008 pos = tmp[:, 1:].astype(np.float64) * units['Bohr']

1009 try:

1010 atoms = Atoms(els, pos)

1011 # ASE doesn't recognize names used in ONETEP

1012 # as chemical symbol: dig deeper

1013 except KeyError:

1014 tags, real_elements = self.find_correct_species(

1015 els, idx, species, real_species

1016 )

1017 atoms = Atoms(real_elements, pos)

1018 atoms.set_tags(tags)

1019 atoms.info['onetep_species'] = list(els)

1020 return atoms

1021 n += 1

1022 return None

1023

1024 def parse_species(self, idx):

1025 n = 1

1026 element_map = {}

1027 while idx + n < len(self.fdo_lines):

1028 sep = self.fdo_lines[idx + n].split()

1029 if re.search(

1030 r'(?i)^\s*%ENDBLOCK\s*SPECIES\s*:?\s*([*#!].*)?$',

1031 self.fdo_lines[idx + n],

1032 ):

1033 return element_map

1034 to_skip = re.search(

1035 r'(?i)^\s*(ang|bohr)\s*([*#!].*)?$', self.fdo_lines[idx + n]

1036 )

1037 if not to_skip:

1038 element_map[sep[0]] = sep[1]

1039 n += 1

1040 return None

1041

1042 def parse_spin(self, idx):

1043 n = 0

1044 offset = 4

1045 while idx + n < len(self.fdo_lines):

1046 if not self.fdo_lines[idx + n].strip():

1047 # If no spin is present we return None

1048 try:

1049 tmp_spins = np.loadtxt(

1050 self.fdo_lines[idx + offset : idx + n - 1], usecols=4

1051 )

1052 return np.reshape(tmp_spins, -1)

1053 except ValueError:

1054 return None

1055 n += 1

1056 return None

1057

1058 # This is needed if ASE doesn't recognize the element

1059 def find_correct_species(

1060 self, els, idx, species, real_species, first=False

1061 ):

1062 real_elements = []

1063 tags = []

1064 # Find nearest species block in case of

1065 # multi-output file with different species blocks.

1066 if first:

1067 closest_species = np.argmin(abs(idx - species))

1068 else:

1069 tmp = idx - species

1070 tmp[tmp < 0] = 9999999999

1071 closest_species = np.argmin(tmp)

1072 elements_map = real_species[closest_species]

1073 for el in els:

1074 real_elements.append(elements_map[el])

1075 tag_maybe = el.replace(elements_map[el], '')

1076 if tag_maybe.isdigit():

1077 tags.append(int(tag_maybe))

1078 else:

1079 tags.append(False)

1080 return tags, real_elements

Coverage for ase / io / onetep.py: 82.94%

551 statements