Coverage for /builds/ase/ase/ase/io/magres.py: 84.21%

323 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3"""This module provides I/O functions for the MAGRES file format, introduced 

4by CASTEP as an output format to store structural data and ab-initio 

5calculated NMR parameters. 

6Authors: Simone Sturniolo (ase implementation), Tim Green (original magres 

7 parser code) 

8""" 

9 

10import re 

11from collections import OrderedDict 

12 

13import numpy as np 

14 

15import ase.units 

16from ase.atoms import Atoms 

17from ase.calculators.singlepoint import SinglePointDFTCalculator 

18from ase.spacegroup import Spacegroup 

19from ase.spacegroup.spacegroup import SpacegroupNotFoundError 

20 

21_mprops = { 

22 'ms': ('sigma', 1), 

23 'sus': ('S', 0), 

24 'efg': ('V', 1), 

25 'isc': ('K', 2)} 

26# (matrix name, number of atoms in interaction) for various magres quantities 

27 

28 

29def read_magres(fd, include_unrecognised=False): 

30 """ 

31 Reader function for magres files. 

32 """ 

33 

34 blocks_re = re.compile(r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' + 

35 r'(?P=block_name)[\]>]', re.M | re.S) 

36 

37 """ 

38 Here are defined the various functions required to parse 

39 different blocks. 

40 """ 

41 

42 def tensor33(x): 

43 return np.squeeze(np.reshape(x, (3, 3))).tolist() 

44 

45 def tensor31(x): 

46 return np.squeeze(np.reshape(x, (3, 1))).tolist() 

47 

48 def get_version(file_contents): 

49 """ 

50 Look for and parse the magres file format version line 

51 """ 

52 

53 lines = file_contents.split('\n') 

54 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0]) 

55 

56 if match: 

57 version = match.groups() 

58 version = tuple(vnum for vnum in version) 

59 else: 

60 version = None 

61 

62 return version 

63 

64 def parse_blocks(file_contents): 

65 """ 

66 Parse series of XML-like deliminated blocks into a list of 

67 (block_name, contents) tuples 

68 """ 

69 

70 blocks = blocks_re.findall(file_contents) 

71 

72 return blocks 

73 

74 def parse_block(block): 

75 """ 

76 Parse block contents into a series of (tag, data) records 

77 """ 

78 

79 def clean_line(line): 

80 # Remove comments and whitespace at start and ends of line 

81 line = re.sub('#(.*?)\n', '', line) 

82 line = line.strip() 

83 

84 return line 

85 

86 name, data = block 

87 

88 lines = [clean_line(line) for line in data.split('\n')] 

89 

90 records = [] 

91 

92 for line in lines: 

93 xs = line.split() 

94 

95 if len(xs) > 0: 

96 tag = xs[0] 

97 data = xs[1:] 

98 

99 records.append((tag, data)) 

100 

101 return (name, records) 

102 

103 def check_units(d): 

104 """ 

105 Verify that given units for a particular tag are correct. 

106 """ 

107 

108 allowed_units = {'lattice': 'Angstrom', 

109 'atom': 'Angstrom', 

110 'ms': 'ppm', 

111 'efg': 'au', 

112 'efg_local': 'au', 

113 'efg_nonlocal': 'au', 

114 'isc': '10^19.T^2.J^-1', 

115 'isc_fc': '10^19.T^2.J^-1', 

116 'isc_orbital_p': '10^19.T^2.J^-1', 

117 'isc_orbital_d': '10^19.T^2.J^-1', 

118 'isc_spin': '10^19.T^2.J^-1', 

119 'sus': '10^-6.cm^3.mol^-1', 

120 'calc_cutoffenergy': 'Hartree', } 

121 

122 if d[0] in d and d[1] == allowed_units[d[0]]: 

123 pass 

124 else: 

125 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}') 

126 

127 return d 

128 

129 def parse_magres_block(block): 

130 """ 

131 Parse magres block into data dictionary given list of record 

132 tuples. 

133 """ 

134 

135 _name, records = block 

136 

137 # 3x3 tensor 

138 def ntensor33(name): 

139 return lambda d: {name: tensor33([float(x) for x in data])} 

140 

141 # Atom label, atom index and 3x3 tensor 

142 def sitensor33(name): 

143 return lambda d: _parse_sitensor33(name, data) 

144 

145 # 2x(Atom label, atom index) and 3x3 tensor 

146 def sisitensor33(name): 

147 return lambda d: {'atom1': {'label': data[0], 

148 'index': int(data[1])}, 

149 'atom2': {'label': data[2], 

150 'index': int(data[3])}, 

151 name: tensor33([float(x) for x in data[4:]])} 

152 

153 tags = {'ms': sitensor33('sigma'), 

154 'sus': ntensor33('S'), 

155 'efg': sitensor33('V'), 

156 'efg_local': sitensor33('V'), 

157 'efg_nonlocal': sitensor33('V'), 

158 'isc': sisitensor33('K'), 

159 'isc_fc': sisitensor33('K'), 

160 'isc_spin': sisitensor33('K'), 

161 'isc_orbital_p': sisitensor33('K'), 

162 'isc_orbital_d': sisitensor33('K'), 

163 'units': check_units} 

164 

165 data_dict = {} 

166 

167 for record in records: 

168 tag, data = record 

169 

170 if tag not in data_dict: 

171 data_dict[tag] = [] 

172 

173 data_dict[tag].append(tags[tag](data)) 

174 

175 return data_dict 

176 

177 def _unmunge_label_index(label_index: str) -> tuple[str, str]: 

178 """Splits a label_index string into a label and an index, 

179 where the index is always the final 3 digits. 

180 

181 This function handles cases where the site label and index are combined 

182 in CASTEP magres files (versions < 23), 

183 e.g., 'H1222' instead of 'H1' and '222'. 

184 

185 Since site labels can contain numbers (e.g., H1, H2, H1a), 

186 we extract the index as the final 3 digits. 

187 The remaining characters form the label. 

188 

189 Note: Only call this function when label and index are confirmed 

190 to be combined (detected by the line having 10 fields instead of 11). 

191 

192 Parameters 

193 ---------- 

194 label_index : str 

195 The input string containing the combined label and index 

196 (e.g., 'H1222') 

197 

198 Returns 

199 ------- 

200 tuple[str, str] 

201 A tuple of (label, index) strings (e.g., ('H1', '222')) 

202 

203 Raises 

204 ------ 

205 RuntimeError 

206 If the index is >999 (not supported by this solution)) 

207 If invalid data format or regex match failure 

208 

209 Examples 

210 -------- 

211 >>> _unmunge_label_index('H1222') 

212 ('H1', '222') 

213 >>> _unmunge_label_index('C201') 

214 ('C', '201') 

215 >>> _unmunge_label_index('H23104') 

216 ('H23', '104') 

217 >>> _unmunge_label_index('H1a100') 

218 ('H1a', '100') 

219 """ 

220 match = re.match(r'(.+?)(\d{3})$', label_index) 

221 if match: 

222 label, index = match.groups() 

223 if not isinstance(label, str) or not isinstance(index, str): 

224 raise RuntimeError("Regex match produced non-string values") 

225 if index == '000': 

226 raise RuntimeError( 

227 "Index greater than 999 detected. This is not supported in " 

228 "magres files with munged label and indices. " 

229 "Try manually unmunging the label and index." 

230 ) 

231 return (label, index) 

232 raise RuntimeError('Invalid data in magres block. ' 

233 'Check the site labels and indices.') 

234 

235 def _parse_sitensor33(name, data): 

236 # We expect label, index, and then the 3x3 tensor 

237 if len(data) == 10: 

238 label, index = _unmunge_label_index(data[0]) 

239 data = [label, index] + data[1:] 

240 if len(data) != 11: 

241 raise ValueError( 

242 f"Expected 11 values for {name} tensor data, " 

243 f"got {len(data)}" 

244 ) 

245 

246 return {'atom': {'label': data[0], 

247 'index': int(data[1])}, 

248 name: tensor33([float(x) for x in data[2:]])} 

249 

250 def parse_atoms_block(block): 

251 """ 

252 Parse atoms block into data dictionary given list of record tuples. 

253 """ 

254 

255 _name, records = block 

256 

257 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3 

258 def lattice(d): 

259 return tensor33([float(x) for x in data]) 

260 

261 # Atom record: label, index, x, y, z 

262 def atom(d): 

263 return {'species': data[0], 

264 'label': data[1], 

265 'index': int(data[2]), 

266 'position': tensor31([float(x) for x in data[3:]])} 

267 

268 def symmetry(d): 

269 return ' '.join(data) 

270 

271 tags = {'lattice': lattice, 

272 'atom': atom, 

273 'units': check_units, 

274 'symmetry': symmetry} 

275 

276 data_dict = {} 

277 

278 for record in records: 

279 tag, data = record 

280 if tag not in data_dict: 

281 data_dict[tag] = [] 

282 data_dict[tag].append(tags[tag](data)) 

283 

284 return data_dict 

285 

286 def parse_generic_block(block): 

287 """ 

288 Parse any other block into data dictionary given list of record 

289 tuples. 

290 """ 

291 

292 _name, records = block 

293 

294 data_dict = {} 

295 

296 for record in records: 

297 tag, data = record 

298 

299 if tag not in data_dict: 

300 data_dict[tag] = [] 

301 

302 data_dict[tag].append(data) 

303 

304 return data_dict 

305 

306 """ 

307 Actual parser code. 

308 """ 

309 

310 block_parsers = {'magres': parse_magres_block, 

311 'atoms': parse_atoms_block, 

312 'calculation': parse_generic_block, } 

313 

314 file_contents = fd.read() 

315 

316 # This works as a validity check 

317 version = get_version(file_contents) 

318 if version is None: 

319 # This isn't even a .magres file! 

320 raise RuntimeError('File is not in standard Magres format') 

321 blocks = parse_blocks(file_contents) 

322 

323 data_dict = {} 

324 

325 for block_data in blocks: 

326 block = parse_block(block_data) 

327 

328 if block[0] in block_parsers: 

329 block_dict = block_parsers[block[0]](block) 

330 data_dict[block[0]] = block_dict 

331 else: 

332 # Throw in the text content of blocks we don't recognise 

333 if include_unrecognised: 

334 data_dict[block[0]] = block_data[1] 

335 

336 # Now the loaded data must be turned into an ASE Atoms object 

337 

338 # First check if the file is even viable 

339 if 'atoms' not in data_dict: 

340 raise RuntimeError('Magres file does not contain structure data') 

341 

342 # Allowed units handling. This is redundant for now but 

343 # could turn out useful in the future 

344 

345 magres_units = {'Angstrom': ase.units.Ang} 

346 

347 # Lattice parameters? 

348 if 'lattice' in data_dict['atoms']: 

349 try: 

350 u = dict(data_dict['atoms']['units'])['lattice'] 

351 except KeyError: 

352 raise RuntimeError('No units detected in file for lattice') 

353 u = magres_units[u] 

354 cell = np.array(data_dict['atoms']['lattice'][0]) * u 

355 pbc = True 

356 else: 

357 cell = None 

358 pbc = False 

359 

360 # Now the atoms 

361 symbols = [] 

362 positions = [] 

363 indices = [] 

364 labels = [] 

365 

366 if 'atom' in data_dict['atoms']: 

367 try: 

368 u = dict(data_dict['atoms']['units'])['atom'] 

369 except KeyError: 

370 raise RuntimeError('No units detected in file for atom positions') 

371 u = magres_units[u] 

372 # Now we have to account for the possibility of there being CASTEP 

373 # 'custom' species amongst the symbols 

374 custom_species = None 

375 for a in data_dict['atoms']['atom']: 

376 spec_custom = a['species'].split(':', 1) 

377 if len(spec_custom) > 1 and custom_species is None: 

378 # Add it to the custom info! 

379 custom_species = list(symbols) 

380 symbols.append(spec_custom[0]) 

381 positions.append(a['position']) 

382 indices.append(a['index']) 

383 labels.append(a['label']) 

384 if custom_species is not None: 

385 custom_species.append(a['species']) 

386 

387 atoms = Atoms(cell=cell, 

388 pbc=pbc, 

389 symbols=symbols, 

390 positions=positions) 

391 

392 # Add custom species if present 

393 if custom_species is not None: 

394 atoms.new_array('castep_custom_species', np.array(custom_species)) 

395 

396 # Add the spacegroup, if present and recognizable 

397 if 'symmetry' in data_dict['atoms']: 

398 try: 

399 spg = Spacegroup(data_dict['atoms']['symmetry'][0]) 

400 except SpacegroupNotFoundError: 

401 # Not found 

402 spg = Spacegroup(1) # Most generic one 

403 atoms.info['spacegroup'] = spg 

404 

405 # Set up the rest of the properties as arrays 

406 atoms.new_array('indices', np.array(indices)) 

407 atoms.new_array('labels', np.array(labels)) 

408 

409 # Now for the magres specific stuff 

410 li_list = list(zip(labels, indices)) 

411 

412 def create_magres_array(name, order, block): 

413 

414 if order == 1: 

415 u_arr = [None] * len(li_list) 

416 elif order == 2: 

417 u_arr = [[None] * (i + 1) for i in range(len(li_list))] 

418 else: 

419 raise ValueError( 

420 'Invalid order value passed to create_magres_array') 

421 

422 for s in block: 

423 # Find the atom index/indices 

424 if order == 1: 

425 # First find out which atom this is 

426 at = (s['atom']['label'], s['atom']['index']) 

427 try: 

428 ai = li_list.index(at) 

429 except ValueError: 

430 raise RuntimeError('Invalid data in magres block') 

431 # Then add the relevant quantity 

432 u_arr[ai] = s[mn] 

433 else: 

434 at1 = (s['atom1']['label'], s['atom1']['index']) 

435 at2 = (s['atom2']['label'], s['atom2']['index']) 

436 ai1 = li_list.index(at1) 

437 ai2 = li_list.index(at2) 

438 # Sort them 

439 ai1, ai2 = sorted((ai1, ai2), reverse=True) 

440 u_arr[ai1][ai2] = s[mn] 

441 

442 if order == 1: 

443 return np.array(u_arr) 

444 else: 

445 return np.array(u_arr, dtype=object) 

446 

447 if 'magres' in data_dict: 

448 if 'units' in data_dict['magres']: 

449 atoms.info['magres_units'] = dict(data_dict['magres']['units']) 

450 for u in atoms.info['magres_units']: 

451 # This bit to keep track of tags 

452 u0 = u.split('_')[0] 

453 

454 if u0 not in _mprops: 

455 raise RuntimeError('Invalid data in magres block') 

456 

457 mn, order = _mprops[u0] 

458 

459 if order > 0: 

460 u_arr = create_magres_array(mn, order, 

461 data_dict['magres'][u]) 

462 atoms.new_array(u, u_arr) 

463 else: 

464 # atoms.info['magres_data'] = atoms.info.get('magres_data', 

465 # {}) 

466 # # We only take element 0 because for this sort of data 

467 # # there should be only that 

468 # atoms.info['magres_data'][u] = \ 

469 # data_dict['magres'][u][0][mn] 

470 if atoms.calc is None: 

471 calc = SinglePointDFTCalculator(atoms) 

472 atoms.calc = calc 

473 atoms.calc.results[u] = data_dict['magres'][u][0][mn] 

474 

475 if 'calculation' in data_dict: 

476 atoms.info['magresblock_calculation'] = data_dict['calculation'] 

477 

478 if include_unrecognised: 

479 for b in data_dict: 

480 if b not in block_parsers: 

481 atoms.info['magresblock_' + b] = data_dict[b] 

482 

483 return atoms 

484 

485 

486def tensor_string(tensor): 

487 return ' '.join(' '.join(str(x) for x in xs) for xs in tensor) 

488 

489 

490def write_magres(fd, image): 

491 """ 

492 A writing function for magres files. Two steps: first data are arranged 

493 into structures, then dumped to the actual file 

494 """ 

495 

496 image_data = {} 

497 image_data['atoms'] = {'units': []} 

498 # Contains units, lattice and each individual atom 

499 if np.all(image.get_pbc()): 

500 # Has lattice! 

501 image_data['atoms']['units'].append(['lattice', 'Angstrom']) 

502 image_data['atoms']['lattice'] = [image.get_cell()] 

503 

504 # Now for the atoms 

505 if image.has('labels'): 

506 labels = image.get_array('labels') 

507 else: 

508 labels = image.get_chemical_symbols() 

509 

510 if image.has('indices'): 

511 indices = image.get_array('indices') 

512 else: 

513 indices = [labels[:i + 1].count(labels[i]) for i in range(len(labels))] 

514 

515 # Iterate over atoms 

516 symbols = (image.get_array('castep_custom_species') 

517 if image.has('castep_custom_species') 

518 else image.get_chemical_symbols()) 

519 

520 atom_info = list(zip(symbols, 

521 image.get_positions())) 

522 if len(atom_info) > 0: 

523 image_data['atoms']['units'].append(['atom', 'Angstrom']) 

524 image_data['atoms']['atom'] = [] 

525 

526 for i, a in enumerate(atom_info): 

527 image_data['atoms']['atom'].append({ 

528 'index': indices[i], 

529 'position': a[1], 

530 'species': a[0], 

531 'label': labels[i]}) 

532 

533 # Spacegroup, if present 

534 if 'spacegroup' in image.info: 

535 image_data['atoms']['symmetry'] = [image.info['spacegroup'] 

536 .symbol.replace(' ', '')] 

537 

538 # Now go on to do the same for magres information 

539 if 'magres_units' in image.info: 

540 

541 image_data['magres'] = {'units': []} 

542 

543 for u in image.info['magres_units']: 

544 # Get the type 

545 p = u.split('_')[0] 

546 if p in _mprops: 

547 image_data['magres']['units'].append( 

548 [u, image.info['magres_units'][u]]) 

549 image_data['magres'][u] = [] 

550 mn, order = _mprops[p] 

551 

552 if order == 0: 

553 # The case of susceptibility 

554 tens = { 

555 mn: image.calc.results[u] 

556 } 

557 image_data['magres'][u] = tens 

558 else: 

559 arr = image.get_array(u) 

560 li_tab = zip(labels, indices) 

561 for i, (lab, ind) in enumerate(li_tab): 

562 if order == 2: 

563 for j, (lab2, ind2) in enumerate(li_tab[:i + 1]): 

564 if arr[i][j] is not None: 

565 tens = {mn: arr[i][j], 

566 'atom1': {'label': lab, 

567 'index': ind}, 

568 'atom2': {'label': lab2, 

569 'index': ind2}} 

570 image_data['magres'][u].append(tens) 

571 elif order == 1: 

572 if arr[i] is not None: 

573 tens = {mn: arr[i], 

574 'atom': {'label': lab, 

575 'index': ind}} 

576 image_data['magres'][u].append(tens) 

577 

578 # Calculation block, if present 

579 if 'magresblock_calculation' in image.info: 

580 image_data['calculation'] = image.info['magresblock_calculation'] 

581 

582 def write_units(data, out): 

583 if 'units' in data: 

584 for tag, units in data['units']: 

585 out.append(f' units {tag} {units}') 

586 

587 def write_magres_block(data): 

588 """ 

589 Write out a <magres> block from its dictionary representation 

590 """ 

591 

592 out = [] 

593 

594 def nout(tag, tensor_name): 

595 if tag in data: 

596 out.append(' '.join([' ', tag, 

597 tensor_string(data[tag][tensor_name])])) 

598 

599 def siout(tag, tensor_name): 

600 if tag in data: 

601 for atom_si in data[tag]: 

602 out.append((' %s %s %d ' 

603 '%s') % (tag, 

604 atom_si['atom']['label'], 

605 atom_si['atom']['index'], 

606 tensor_string(atom_si[tensor_name]))) 

607 

608 write_units(data, out) 

609 

610 nout('sus', 'S') 

611 siout('ms', 'sigma') 

612 

613 siout('efg_local', 'V') 

614 siout('efg_nonlocal', 'V') 

615 siout('efg', 'V') 

616 

617 def sisiout(tag, tensor_name): 

618 if tag in data: 

619 for isc in data[tag]: 

620 out.append((' %s %s %d %s %d ' 

621 '%s') % (tag, 

622 isc['atom1']['label'], 

623 isc['atom1']['index'], 

624 isc['atom2']['label'], 

625 isc['atom2']['index'], 

626 tensor_string(isc[tensor_name]))) 

627 

628 sisiout('isc_fc', 'K') 

629 sisiout('isc_orbital_p', 'K') 

630 sisiout('isc_orbital_d', 'K') 

631 sisiout('isc_spin', 'K') 

632 sisiout('isc', 'K') 

633 

634 return '\n'.join(out) 

635 

636 def write_atoms_block(data): 

637 out = [] 

638 

639 write_units(data, out) 

640 

641 if 'lattice' in data: 

642 for lat in data['lattice']: 

643 out.append(f" lattice {tensor_string(lat)}") 

644 

645 if 'symmetry' in data: 

646 for sym in data['symmetry']: 

647 out.append(f' symmetry {sym}') 

648 

649 if 'atom' in data: 

650 for a in data['atom']: 

651 out.append((' atom %s %s %s ' 

652 '%s') % (a['species'], 

653 a['label'], 

654 a['index'], 

655 ' '.join(str(x) for x in a['position']))) 

656 

657 return '\n'.join(out) 

658 

659 def write_generic_block(data): 

660 out = [] 

661 

662 for tag, data in data.items(): 

663 for value in data: 

664 out.append('{} {}'.format(tag, ' '.join(str(x) for x in value))) 

665 

666 return '\n'.join(out) 

667 

668 # Using this to preserve order 

669 block_writers = OrderedDict([('calculation', write_generic_block), 

670 ('atoms', write_atoms_block), 

671 ('magres', write_magres_block)]) 

672 

673 # First, write the header 

674 fd.write('#$magres-abinitio-v1.0\n') 

675 fd.write('# Generated by the Atomic Simulation Environment library\n') 

676 

677 for b in block_writers: 

678 if b in image_data: 

679 fd.write(f'[{b}]\n') 

680 fd.write(block_writers[b](image_data[b])) 

681 fd.write(f'\n[/{b}]\n') 

682 

683 # Now on to check for any non-standard blocks... 

684 for i in image.info: 

685 if '_' in i: 

686 ismag, b = i.split('_', 1) 

687 if ismag == 'magresblock' and b not in block_writers: 

688 fd.write(f'[{b}]\n') 

689 fd.write(image.info[i]) 

690 fd.write(f'[/{b}]\n')