Coverage for /builds/ase/ase/ase/io/nwchem/nwreader.py: 65.44%

272 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3import re 

4from collections import OrderedDict 

5 

6import numpy as np 

7 

8from ase import Atoms 

9from ase.calculators.singlepoint import ( 

10 SinglePointDFTCalculator, 

11 SinglePointKPoint, 

12) 

13from ase.units import Bohr, Hartree 

14 

15from .parser import _define_pattern 

16 

17# Note to the reader of this code: Here and below we use the function 

18# _define_pattern from parser.py in this same directory to compile 

19# regular expressions. These compiled expressions are stored along with 

20# an example string that the expression should match in a list that 

21# is used during tests (test/nwchem/nwchem_parser.py) to ensure that 

22# the regular expressions are still working correctly. 

23 

24# Matches the beginning of a GTO calculation 

25_gauss_block = _define_pattern( 

26 r'^[\s]+NWChem (?:SCF|DFT) Module\n$', 

27 " NWChem SCF Module\n", 

28) 

29 

30 

31# Matches the beginning of a plane wave calculation 

32_pw_block = _define_pattern( 

33 r'^[\s]+\*[\s]+NWPW (?:PSPW|BAND|PAW|Band Structure) Calculation' 

34 r'[\s]+\*[\s]*\n$', 

35 " * NWPW PSPW Calculation *\n", 

36) 

37 

38 

39# Top-level parser 

40def read_nwchem_out(fobj, index=-1): 

41 """Splits an NWChem output file into chunks corresponding to 

42 individual single point calculations.""" 

43 lines = fobj.readlines() 

44 

45 if index == slice(-1, None, None): 

46 for line in lines: 

47 if _gauss_block.match(line): 

48 return [parse_gto_chunk(''.join(lines))] 

49 if _pw_block.match(line): 

50 return [parse_pw_chunk(''.join(lines))] 

51 raise ValueError('This does not appear to be a valid NWChem ' 

52 'output file.') 

53 

54 # First, find each SCF block 

55 group = [] 

56 atomslist = [] 

57 header = True 

58 lastgroup = [] 

59 lastparser = None 

60 parser = None 

61 for line in lines: 

62 group.append(line) 

63 if _gauss_block.match(line): 

64 next_parser = parse_gto_chunk 

65 elif _pw_block.match(line): 

66 next_parser = parse_pw_chunk 

67 else: 

68 continue 

69 

70 if header: 

71 header = False 

72 else: 

73 atoms = parser(''.join(group)) 

74 if atoms is None and parser is lastparser: 

75 atoms = parser(''.join(lastgroup + group)) 

76 if atoms is not None: 

77 atomslist[-1] = atoms 

78 lastgroup += group 

79 else: 

80 atomslist.append(atoms) 

81 lastgroup = group 

82 lastparser = parser 

83 group = [] 

84 parser = next_parser 

85 if not header: 

86 atoms = parser(''.join(group)) 

87 if atoms is not None: 

88 atomslist.append(atoms) 

89 

90 return atomslist[index] 

91 

92 

93# Matches a geometry block and returns the geometry specification lines 

94_geom = _define_pattern( 

95 r'\n[ \t]+Geometry \"[ \t\S]+\" -> \"[ \t\S]*\"[ \t]*\n' 

96 r'^[ \t-]+\n' 

97 r'(?:^[ \t\S]*\n){3}' 

98 r'^[ \t]+No\.[ \t]+Tag[ \t]+Charge[ \t]+X[ \t]+Y[ \t]+Z\n' 

99 r'^[ \t-]+\n' 

100 r'((?:^(?:[ \t]+[\S]+){6}[ \t]*\n)+)', 

101 """\ 

102 

103 Geometry "geometry" -> "" 

104 ------------------------- 

105 

106 Output coordinates in angstroms (scale by 1.889725989 to convert to a.u.) 

107 

108 No. Tag Charge X Y Z 

109 ---- ---------------- ---------- -------------- -------------- -------------- 

110 1 C 6.0000 0.00000000 0.00000000 0.00000000 

111 2 H 1.0000 0.62911800 0.62911800 0.62911800 

112 3 H 1.0000 -0.62911800 -0.62911800 0.62911800 

113 4 H 1.0000 0.62911800 -0.62911800 -0.62911800 

114""", re.M) 

115 

116# Unit cell parser 

117_cell_block = _define_pattern(r'^[ \t]+Lattice Parameters[ \t]*\n' 

118 r'^(?:[ \t\S]*\n){4}' 

119 r'((?:^(?:[ \t]+[\S]+){5}\n){3})', 

120 """\ 

121 Lattice Parameters 

122 ------------------ 

123 

124 lattice vectors in angstroms (scale by 1.889725989 to convert to a.u.) 

125 

126 a1=< 4.000 0.000 0.000 > 

127 a2=< 0.000 5.526 0.000 > 

128 a3=< 0.000 0.000 4.596 > 

129 a= 4.000 b= 5.526 c= 4.596 

130 alpha= 90.000 beta= 90.000 gamma= 90.000 

131 omega= 101.6 

132""", re.M) 

133 

134 

135# Parses the geometry and returns the corresponding Atoms object 

136def _parse_geomblock(chunk): 

137 geomblocks = _geom.findall(chunk) 

138 if not geomblocks: 

139 return None 

140 geomblock = geomblocks[-1].strip().split('\n') 

141 natoms = len(geomblock) 

142 symbols = [] 

143 pos = np.zeros((natoms, 3)) 

144 for i, line in enumerate(geomblock): 

145 line = line.strip().split() 

146 symbols.append(line[1]) 

147 pos[i] = [float(x) for x in line[3:6]] 

148 

149 cellblocks = _cell_block.findall(chunk) 

150 if cellblocks: 

151 cellblock = cellblocks[-1].strip().split('\n') 

152 cell = np.zeros((3, 3)) 

153 for i, line in enumerate(cellblock): 

154 line = line.strip().split() 

155 cell[i] = [float(x) for x in line[1:4]] 

156 else: 

157 cell = None 

158 return Atoms(symbols, positions=pos, cell=cell) 

159 

160 

161# GTO-specific parser stuff 

162 

163# Matches gradient block from a GTO calculation 

164_gto_grad = _define_pattern( 

165 r'^[ \t]+[\S]+[ \t]+ENERGY GRADIENTS[ \t]*[\n]+' 

166 r'^[ \t]+atom[ \t]+coordinates[ \t]+gradient[ \t]*\n' 

167 r'^(?:[ \t]+x[ \t]+y[ \t]+z){2}[ \t]*\n' 

168 r'((?:^(?:[ \t]+[\S]+){8}\n)+)[ \t]*\n', 

169 """\ 

170 UHF ENERGY GRADIENTS 

171 

172 atom coordinates gradient 

173 x y z x y z 

174 1 C 0.293457 -0.293457 0.293457 -0.000083 0.000083 -0.000083 

175 2 H 1.125380 1.355351 1.125380 0.000086 0.000089 0.000086 

176 3 H -1.355351 -1.125380 1.125380 -0.000089 -0.000086 0.000086 

177 4 H 1.125380 -1.125380 -1.355351 0.000086 -0.000086 -0.000089 

178 

179""", re.M) 

180 

181# Energy parsers for a variety of different GTO calculations 

182_e_gto = OrderedDict() 

183_e_gto['tce'] = _define_pattern( 

184 r'^[\s]+[\S]+[\s]+total energy \/ hartree[\s]+' 

185 r'=[\s]+([\S]+)[\s]*\n', 

186 " CCD total energy / hartree " 

187 "= -75.715332545665888\n", re.M, 

188) 

189_e_gto['ccsd'] = _define_pattern( 

190 r'^[\s]+Total CCSD energy:[\s]+([\S]+)[\s]*\n', 

191 " Total CCSD energy: -75.716168566598569\n", 

192 re.M, 

193) 

194_e_gto['tddft'] = _define_pattern( 

195 r'^[\s]+Excited state energy =[\s]+([\S]+)[\s]*\n', 

196 " Excited state energy = -75.130134499965\n", 

197 re.M, 

198) 

199_e_gto['mp2'] = _define_pattern( 

200 r'^[\s]+Total MP2 energy[\s]+([\S]+)[\s]*\n', 

201 " Total MP2 energy -75.708800087578\n", 

202 re.M, 

203) 

204_e_gto['mf'] = _define_pattern( 

205 r'^[\s]+Total (?:DFT|SCF) energy =[\s]+([\S]+)[\s]*\n', 

206 " Total SCF energy = -75.585555997789\n", 

207 re.M, 

208) 

209 

210 

211# GTO parser 

212def parse_gto_chunk(chunk): 

213 atoms = None 

214 forces = None 

215 energy = None 

216 dipole = None 

217 for theory, pattern in _e_gto.items(): 

218 matches = pattern.findall(chunk) 

219 if matches: 

220 energy = float(matches[-1].replace('D', 'E')) * Hartree 

221 break 

222 

223 gradblocks = _gto_grad.findall(chunk) 

224 if gradblocks: 

225 gradblock = gradblocks[-1].strip().split('\n') 

226 natoms = len(gradblock) 

227 symbols = [] 

228 pos = np.zeros((natoms, 3)) 

229 forces = np.zeros((natoms, 3)) 

230 for i, line in enumerate(gradblock): 

231 line = line.strip().split() 

232 symbols.append(line[1]) 

233 pos[i] = [float(x) for x in line[2:5]] 

234 forces[i] = [-float(x) for x in line[5:8]] 

235 pos *= Bohr 

236 forces *= Hartree / Bohr 

237 atoms = Atoms(symbols, positions=pos) 

238 

239 dipole, _quadrupole = _get_multipole(chunk) 

240 

241 kpts = _get_gto_kpts(chunk) 

242 

243 if atoms is None: 

244 atoms = _parse_geomblock(chunk) 

245 

246 if atoms is None: 

247 return None 

248 

249 # SinglePointDFTCalculator doesn't support quadrupole moment currently 

250 calc = SinglePointDFTCalculator(atoms=atoms, 

251 energy=energy, 

252 free_energy=energy, # XXX Is this right? 

253 forces=forces, 

254 dipole=dipole, 

255 # quadrupole=quadrupole, 

256 ) 

257 calc.kpts = kpts 

258 atoms.calc = calc 

259 return atoms 

260 

261 

262# Extracts dipole and quadrupole moment for a GTO calculation 

263# Note on the regex: Some, but not all, versions of NWChem 

264# insert extra spaces in the blank lines. Do not remove the \s* 

265# in between \n and \n 

266_multipole = _define_pattern( 

267 r'^[ \t]+Multipole analysis of the density[ \t\S]*\n' 

268 r'^[ \t-]+\n\s*\n^[ \t\S]+\n^[ \t-]+\n' 

269 r'((?:(?:(?:[ \t]+[\S]+){7,8}\n)|[ \t]*\n){12})', 

270 """\ 

271 Multipole analysis of the density 

272 --------------------------------- 

273 

274 L x y z total alpha beta nuclear 

275 - - - - ----- ----- ---- ------- 

276 0 0 0 0 -0.000000 -5.000000 -5.000000 10.000000 

277 

278 1 1 0 0 0.000000 0.000000 0.000000 0.000000 

279 1 0 1 0 -0.000001 -0.000017 -0.000017 0.000034 

280 1 0 0 1 -0.902084 -0.559881 -0.559881 0.217679 

281 

282 2 2 0 0 -5.142958 -2.571479 -2.571479 0.000000 

283 2 1 1 0 -0.000000 -0.000000 -0.000000 0.000000 

284 2 1 0 1 0.000000 0.000000 0.000000 0.000000 

285 2 0 2 0 -3.153324 -3.807308 -3.807308 4.461291 

286 2 0 1 1 0.000001 -0.000009 -0.000009 0.000020 

287 2 0 0 2 -4.384288 -3.296205 -3.296205 2.208122 

288""", re.M) 

289 

290 

291# Parses the dipole and quadrupole moment from a GTO calculation 

292def _get_multipole(chunk): 

293 matches = _multipole.findall(chunk) 

294 if not matches: 

295 return None, None 

296 # This pulls the 5th column out of the multipole moments block; 

297 # this column contains the actual moments. 

298 moments = [float(x.split()[4]) for x in matches[-1].split('\n') 

299 if x and not x.isspace()] 

300 dipole = np.array(moments[1:4]) * Bohr 

301 quadrupole = np.zeros(9) 

302 quadrupole[[0, 1, 2, 4, 5, 8]] = [moments[4:]] 

303 quadrupole[[3, 6, 7]] = quadrupole[[1, 2, 5]] 

304 return dipole, quadrupole.reshape((3, 3)) * Bohr**2 

305 

306 

307# MO eigenvalue and occupancy parser for GTO calculations 

308_eval_block = _define_pattern( 

309 r'^[ \t]+[\S]+ Final (?:Alpha |Beta )?Molecular Orbital Analysis[ \t]*' 

310 r'\n^[ \t-]+\n\n' 

311 r'(?:^[ \t]+Vector [ \t\S]+\n(?:^[ \t\S]+\n){3}' 

312 r'(?:^(?:(?:[ \t]+[\S]+){5}){1,2}[ \t]*\n)+\n)+', 

313 """\ 

314 ROHF Final Molecular Orbital Analysis 

315 ------------------------------------- 

316 

317 Vector 1 Occ=2.000000D+00 E=-2.043101D+01 

318 MO Center= 1.1D-20, 1.5D-18, 1.2D-01, r^2= 1.5D-02 

319 Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function  

320 ----- ------------ --------------- ----- ------------ --------------- 

321 1 0.983233 1 O s  

322 

323 Vector 2 Occ=2.000000D+00 E=-1.324439D+00 

324 MO Center= -2.1D-18, -8.6D-17, -7.1D-02, r^2= 5.1D-01 

325 Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function  

326 ----- ------------ --------------- ----- ------------ --------------- 

327 6 0.708998 1 O s 1 -0.229426 1 O s  

328 2 0.217752 1 O s  

329 """, re.M) # noqa: W291 

330 

331 

332# Parses the eigenvalues and occupations from a GTO calculation 

333def _get_gto_kpts(chunk): 

334 eval_blocks = _eval_block.findall(chunk) 

335 if not eval_blocks: 

336 return [] 

337 kpts = [] 

338 kpt = _get_gto_evals(eval_blocks[-1]) 

339 if kpt.s == 1: 

340 kpts.append(_get_gto_evals(eval_blocks[-2])) 

341 kpts.append(kpt) 

342 return kpts 

343 

344 

345# Extracts MO eigenvalue and occupancy for a GTO calculation 

346_extract_vector = _define_pattern( 

347 r'^[ \t]+Vector[ \t]+([\S])+[ \t]+Occ=([\S]+)[ \t]+E=[ \t]*([\S]+)[ \t]*\n', 

348 " Vector 1 Occ=2.000000D+00 E=-2.043101D+01\n", re.M, 

349) 

350 

351 

352# Extracts the eigenvalues and occupations from a GTO calculation 

353def _get_gto_evals(chunk): 

354 spin = 1 if re.match(r'[ \t\S]+Beta', chunk) else 0 

355 data = [] 

356 for vector in _extract_vector.finditer(chunk): 

357 data.append([float(x.replace('D', 'E')) for x in vector.groups()[1:]]) 

358 data = np.array(data) 

359 occ = data[:, 0] 

360 energies = data[:, 1] * Hartree 

361 

362 return SinglePointKPoint(1., spin, 0, energies, occ) 

363 

364 

365# Plane wave specific parsing stuff 

366 

367# Matches the gradient block from a plane wave calculation 

368_nwpw_grad = _define_pattern( 

369 r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n' 

370 r'^[ \t]+Ion Forces:[ \t]*\n' 

371 r'((?:^(?:[ \t]+[\S]+){7}\n)+)', 

372 """\ 

373 ============= Ion Gradients ================= 

374 Ion Forces: 

375 1 O ( -0.000012 0.000027 -0.005199 ) 

376 2 H ( 0.000047 -0.013082 0.020790 ) 

377 3 H ( 0.000047 0.012863 0.020786 ) 

378 C.O.M. ( -0.000000 -0.000000 -0.000000 ) 

379 =============================================== 

380""", re.M) 

381 

382# Matches the gradient block from a PAW calculation 

383_paw_grad = _define_pattern( 

384 r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n' 

385 r'^[ \t]+Ion Positions:[ \t]*\n' 

386 r'((?:^(?:[ \t]+[\S]+){7}\n)+)' 

387 r'^[ \t]+Ion Forces:[ \t]*\n' 

388 r'((?:^(?:[ \t]+[\S]+){7}\n)+)', 

389 """\ 

390 ============= Ion Gradients ================= 

391 Ion Positions: 

392 1 O ( -3.77945 -5.22176 -3.77945 ) 

393 2 H ( -3.77945 -3.77945 3.77945 ) 

394 3 H ( -3.77945 3.77945 3.77945 ) 

395 Ion Forces: 

396 1 O ( -0.00001 -0.00000 0.00081 ) 

397 2 H ( 0.00005 -0.00026 -0.00322 ) 

398 3 H ( 0.00005 0.00030 -0.00322 ) 

399 C.O.M. ( -0.00000 -0.00000 -0.00000 ) 

400 =============================================== 

401""", re.M) 

402 

403# Energy parser for plane wave calculations 

404_nwpw_energy = _define_pattern( 

405 r'^[\s]+Total (?:PSPW|BAND|PAW) energy' 

406 r'[\s]+:[\s]+([\S]+)[\s]*\n', 

407 " Total PSPW energy : -0.1709317826E+02\n", 

408 re.M, 

409) 

410 

411# Parser for the fermi energy in a plane wave calculation 

412_fermi_energy = _define_pattern( 

413 r'^[ \t]+Fermi energy =[ \t]+([\S]+) \([ \t]+[\S]+[ \t]*\n', 

414 " Fermi energy = -0.5585062E-01 ( -1.520eV)\n", re.M, 

415) 

416 

417 

418# Plane wave parser 

419def parse_pw_chunk(chunk): 

420 atoms = _parse_geomblock(chunk) 

421 if atoms is None: 

422 return None 

423 

424 energy = None 

425 efermi = None 

426 forces = None 

427 stress = None 

428 

429 matches = _nwpw_energy.findall(chunk) 

430 if matches: 

431 energy = float(matches[-1].replace('D', 'E')) * Hartree 

432 

433 matches = _fermi_energy.findall(chunk) 

434 if matches: 

435 efermi = float(matches[-1].replace('D', 'E')) * Hartree 

436 

437 gradblocks = _nwpw_grad.findall(chunk) 

438 if not gradblocks: 

439 gradblocks = _paw_grad.findall(chunk) 

440 if gradblocks: 

441 gradblock = gradblocks[-1].strip().split('\n') 

442 natoms = len(gradblock) 

443 symbols = [] 

444 forces = np.zeros((natoms, 3)) 

445 for i, line in enumerate(gradblock): 

446 line = line.strip().split() 

447 symbols.append(line[1]) 

448 forces[i] = [float(x) for x in line[3:6]] 

449 forces *= Hartree / Bohr 

450 

451 if atoms.cell: 

452 stress = _get_stress(chunk, atoms.cell) 

453 

454 ibz_kpts, kpts = _get_pw_kpts(chunk) 

455 

456 # NWChem does not calculate an energy extrapolated to the 0K limit, 

457 # so right now, energy and free_energy will be the same. 

458 calc = SinglePointDFTCalculator(atoms=atoms, 

459 energy=energy, 

460 efermi=efermi, 

461 free_energy=energy, 

462 forces=forces, 

463 stress=stress, 

464 ibzkpts=ibz_kpts) 

465 calc.kpts = kpts 

466 atoms.calc = calc 

467 return atoms 

468 

469 

470# Extracts stress tensor from a plane wave calculation 

471_stress = _define_pattern( 

472 r'[ \t]+[=]+[ \t]+(?:total gradient|E all FD)[ \t]+[=]+[ \t]*\n' 

473 r'^[ \t]+S =((?:(?:[ \t]+[\S]+){5}\n){3})[ \t=]+\n', 

474 """\ 

475 ============= total gradient ============== 

476 S = ( -0.22668 0.27174 0.19134 ) 

477 ( 0.23150 -0.26760 0.23226 ) 

478 ( 0.19090 0.27206 -0.22700 ) 

479 =================================================== 

480""", re.M) 

481 

482 

483# Extract stress tensor from a plane wave calculation 

484def _get_stress(chunk, cell): 

485 stress_blocks = _stress.findall(chunk) 

486 if not stress_blocks: 

487 return None 

488 stress_block = stress_blocks[-1] 

489 stress = np.zeros((3, 3)) 

490 for i, row in enumerate(stress_block.strip().split('\n')): 

491 stress[i] = [float(x) for x in row.split()[1:4]] 

492 stress = (stress @ cell) * Hartree / Bohr / cell.volume 

493 stress = 0.5 * (stress + stress.T) 

494 # convert from 3x3 array to Voigt form 

495 return stress.ravel()[[0, 4, 8, 5, 2, 1]] 

496 

497 

498# MO/band eigenvalue and occupancy parser for plane wave calculations 

499_nwpw_eval_block = _define_pattern( 

500 r'(?:(?:^[ \t]+Brillouin zone point:[ \t]+[\S]+[ \t]*\n' 

501 r'(?:[ \t\S]*\n){3,4})?' 

502 r'^[ \t]+(?:virtual )?orbital energies:\n' 

503 r'(?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+\n{,3})+', 

504 """\ 

505 Brillouin zone point: 1 

506 weight= 0.074074 

507 k =< 0.333 0.333 0.333> . <b1,b2,b3>  

508 =< 0.307 0.307 0.307> 

509 

510 orbital energies: 

511 0.3919370E+00 ( 10.665eV) occ=1.000 

512 0.3908827E+00 ( 10.637eV) occ=1.000 0.4155535E+00 ( 11.308eV) occ=1.000 

513 0.3607689E+00 ( 9.817eV) occ=1.000 0.3827820E+00 ( 10.416eV) occ=1.000 

514 0.3544000E+00 ( 9.644eV) occ=1.000 0.3782641E+00 ( 10.293eV) occ=1.000 

515 0.3531137E+00 ( 9.609eV) occ=1.000 0.3778819E+00 ( 10.283eV) occ=1.000 

516 0.2596367E+00 ( 7.065eV) occ=1.000 0.2820723E+00 ( 7.676eV) occ=1.000 

517 

518 Brillouin zone point: 2 

519 weight= 0.074074 

520 k =< -0.000 0.333 0.333> . <b1,b2,b3>  

521 =< 0.614 0.000 0.000> 

522 

523 orbital energies: 

524 0.3967132E+00 ( 10.795eV) occ=1.000 

525 0.3920006E+00 ( 10.667eV) occ=1.000 0.4197952E+00 ( 11.423eV) occ=1.000 

526 0.3912442E+00 ( 10.646eV) occ=1.000 0.4125086E+00 ( 11.225eV) occ=1.000 

527 0.3910472E+00 ( 10.641eV) occ=1.000 0.4124238E+00 ( 11.223eV) occ=1.000 

528 0.3153977E+00 ( 8.582eV) occ=1.000 0.3379797E+00 ( 9.197eV) occ=1.000 

529 0.2801606E+00 ( 7.624eV) occ=1.000 0.3052478E+00 ( 8.306eV) occ=1.000 

530""", re.M) # noqa: E501, W291 

531 

532# Parser for kpoint weights for a plane wave calculation 

533_kpt_weight = _define_pattern( 

534 r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n' 

535 r'^[ \t]+weight=[ \t]+([\S]+)[ \t]*\n', 

536 """\ 

537 Brillouin zone point: 1 

538 weight= 0.074074  

539""", re.M) # noqa: W291 

540 

541 

542# Parse eigenvalues and occupancies from a plane wave calculation 

543def _get_pw_kpts(chunk): 

544 eval_blocks = [] 

545 for block in _nwpw_eval_block.findall(chunk): 

546 if 'pathlength' not in block: 

547 eval_blocks.append(block) 

548 if not eval_blocks: 

549 return [] 

550 if 'virtual' in eval_blocks[-1]: 

551 occ_block = eval_blocks[-2] 

552 virt_block = eval_blocks[-1] 

553 else: 

554 occ_block = eval_blocks[-1] 

555 virt_block = '' 

556 kpts = NWChemKpts() 

557 _extract_pw_kpts(occ_block, kpts, 1.) 

558 _extract_pw_kpts(virt_block, kpts, 0.) 

559 for match in _kpt_weight.finditer(occ_block): 

560 index, weight = match.groups() 

561 kpts.set_weight(index, float(weight)) 

562 return kpts.to_ibz_kpts(), kpts.to_singlepointkpts() 

563 

564 

565# Helper class for keeping track of kpoints and converting to 

566# SinglePointKPoint objects. 

567class NWChemKpts: 

568 def __init__(self): 

569 self.data = {} 

570 self.ibz_kpts = {} 

571 self.weights = {} 

572 

573 def add_ibz_kpt(self, index, raw_kpt): 

574 kpt = np.array([float(x.strip('>')) for x in raw_kpt.split()[1:4]]) 

575 self.ibz_kpts[index] = kpt 

576 

577 def add_eval(self, index, spin, energy, occ): 

578 if index not in self.data: 

579 self.data[index] = {} 

580 if spin not in self.data[index]: 

581 self.data[index][spin] = [] 

582 self.data[index][spin].append((energy, occ)) 

583 

584 def set_weight(self, index, weight): 

585 self.weights[index] = weight 

586 

587 def to_ibz_kpts(self): 

588 if not self.ibz_kpts: 

589 return np.array([[0., 0., 0.]]) 

590 sorted_kpts = sorted(list(self.ibz_kpts.items()), key=lambda x: x[0]) 

591 return np.array(list(zip(*sorted_kpts))[1]) 

592 

593 def to_singlepointkpts(self): 

594 kpts = [] 

595 for i, (index, spins) in enumerate(self.data.items()): 

596 weight = self.weights[index] 

597 for spin, (_, data) in enumerate(spins.items()): 

598 energies, occs = np.array(sorted(data, key=lambda x: x[0])).T 

599 kpts.append(SinglePointKPoint(weight, spin, i, energies, occs)) 

600 return kpts 

601 

602 

603# Extracts MO/band data from a pattern matched by _nwpw_eval_block above 

604_kpt = _define_pattern( 

605 r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n' 

606 r'^[ \t]+weight=[ \t]+([\S])+[ \t]*\n' 

607 r'^[ \t]+k[ \t]+([ \t\S]+)\n' 

608 r'(?:^[ \t\S]*\n){1,2}' 

609 r'^[ \t]+(?:virtual )?orbital energies:\n' 

610 r'((?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+)', 

611 """\ 

612 Brillouin zone point: 1 

613 weight= 0.074074 

614 k =< 0.333 0.333 0.333> . <b1,b2,b3>  

615 =< 0.307 0.307 0.307> 

616 

617 orbital energies: 

618 0.3919370E+00 ( 10.665eV) occ=1.000 

619 0.3908827E+00 ( 10.637eV) occ=1.000 0.4155535E+00 ( 11.308eV) occ=1.000 

620 0.3607689E+00 ( 9.817eV) occ=1.000 0.3827820E+00 ( 10.416eV) occ=1.000 

621 0.3544000E+00 ( 9.644eV) occ=1.000 0.3782641E+00 ( 10.293eV) occ=1.000 

622 0.3531137E+00 ( 9.609eV) occ=1.000 0.3778819E+00 ( 10.283eV) occ=1.000 

623 0.2596367E+00 ( 7.065eV) occ=1.000 0.2820723E+00 ( 7.676eV) occ=1.000 

624""", re.M) # noqa: E501, W291 

625 

626 

627# Extracts kpoints from a plane wave calculation 

628def _extract_pw_kpts(chunk, kpts, default_occ): 

629 for match in _kpt.finditer(chunk): 

630 point, weight, raw_kpt, orbitals = match.groups() 

631 index = int(point) - 1 

632 for line in orbitals.split('\n'): 

633 tokens = line.strip().split() 

634 if not tokens: 

635 continue 

636 ntokens = len(tokens) 

637 a_e = float(tokens[0]) * Hartree 

638 if ntokens % 3 == 0: 

639 a_o = default_occ 

640 else: 

641 a_o = float(tokens[3].split('=')[1]) 

642 kpts.add_eval(index, 0, a_e, a_o) 

643 

644 if ntokens <= 4: 

645 continue 

646 if ntokens == 6: 

647 b_e = float(tokens[3]) * Hartree 

648 b_o = default_occ 

649 elif ntokens == 8: 

650 b_e = float(tokens[4]) * Hartree 

651 b_o = float(tokens[7].split('=')[1]) 

652 kpts.add_eval(index, 1, b_e, b_o) 

653 kpts.set_weight(index, float(weight)) 

654 kpts.add_ibz_kpt(index, raw_kpt)