Coverage for /builds/ase/ase/ase/io/magres.py: 84.21%

1# fmt: off

3"""This module provides I/O functions for the MAGRES file format, introduced

4by CASTEP as an output format to store structural data and ab-initio

5calculated NMR parameters.

6Authors: Simone Sturniolo (ase implementation), Tim Green (original magres

7 parser code)

8"""

10import re

11from collections import OrderedDict

13import numpy as np

15import ase.units

16from ase.atoms import Atoms

17from ase.calculators.singlepoint import SinglePointDFTCalculator

18from ase.spacegroup import Spacegroup

19from ase.spacegroup.spacegroup import SpacegroupNotFoundError

21_mprops = {

22 'ms': ('sigma', 1),

23 'sus': ('S', 0),

24 'efg': ('V', 1),

25 'isc': ('K', 2)}

26# (matrix name, number of atoms in interaction) for various magres quantities

29def read_magres(fd, include_unrecognised=False):

30 """

31 Reader function for magres files.

32 """

34 blocks_re = re.compile(r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' +

35 r'(?P=block_name)[\]>]', re.M | re.S)

37 """

38 Here are defined the various functions required to parse

39 different blocks.

40 """

42 def tensor33(x):

43 return np.squeeze(np.reshape(x, (3, 3))).tolist()

45 def tensor31(x):

46 return np.squeeze(np.reshape(x, (3, 1))).tolist()

48 def get_version(file_contents):

49 """

50 Look for and parse the magres file format version line

51 """

53 lines = file_contents.split('\n')

54 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0])

56 if match:

57 version = match.groups()

58 version = tuple(vnum for vnum in version)

59 else:

60 version = None

62 return version

64 def parse_blocks(file_contents):

65 """

66 Parse series of XML-like deliminated blocks into a list of

67 (block_name, contents) tuples

68 """

70 blocks = blocks_re.findall(file_contents)

72 return blocks

74 def parse_block(block):

75 """

76 Parse block contents into a series of (tag, data) records

77 """

79 def clean_line(line):

80 # Remove comments and whitespace at start and ends of line

81 line = re.sub('#(.*?)\n', '', line)

82 line = line.strip()

84 return line

86 name, data = block

88 lines = [clean_line(line) for line in data.split('\n')]

90 records = []

92 for line in lines:

93 xs = line.split()

95 if len(xs) > 0:

96 tag = xs[0]

97 data = xs[1:]

99 records.append((tag, data))

100

101 return (name, records)

102

103 def check_units(d):

104 """

105 Verify that given units for a particular tag are correct.

106 """

107

108 allowed_units = {'lattice': 'Angstrom',

109 'atom': 'Angstrom',

110 'ms': 'ppm',

111 'efg': 'au',

112 'efg_local': 'au',

113 'efg_nonlocal': 'au',

114 'isc': '10^19.T^2.J^-1',

115 'isc_fc': '10^19.T^2.J^-1',

116 'isc_orbital_p': '10^19.T^2.J^-1',

117 'isc_orbital_d': '10^19.T^2.J^-1',

118 'isc_spin': '10^19.T^2.J^-1',

119 'sus': '10^-6.cm^3.mol^-1',

120 'calc_cutoffenergy': 'Hartree', }

121

122 if d[0] in d and d[1] == allowed_units[d[0]]:

123 pass

124 else:

125 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}')

126

127 return d

128

129 def parse_magres_block(block):

130 """

131 Parse magres block into data dictionary given list of record

132 tuples.

133 """

134

135 _name, records = block

136

137 # 3x3 tensor

138 def ntensor33(name):

139 return lambda d: {name: tensor33([float(x) for x in data])}

140

141 # Atom label, atom index and 3x3 tensor

142 def sitensor33(name):

143 return lambda d: _parse_sitensor33(name, data)

144

145 # 2x(Atom label, atom index) and 3x3 tensor

146 def sisitensor33(name):

147 return lambda d: {'atom1': {'label': data[0],

148 'index': int(data[1])},

149 'atom2': {'label': data[2],

150 'index': int(data[3])},

151 name: tensor33([float(x) for x in data[4:]])}

152

153 tags = {'ms': sitensor33('sigma'),

154 'sus': ntensor33('S'),

155 'efg': sitensor33('V'),

156 'efg_local': sitensor33('V'),

157 'efg_nonlocal': sitensor33('V'),

158 'isc': sisitensor33('K'),

159 'isc_fc': sisitensor33('K'),

160 'isc_spin': sisitensor33('K'),

161 'isc_orbital_p': sisitensor33('K'),

162 'isc_orbital_d': sisitensor33('K'),

163 'units': check_units}

164

165 data_dict = {}

166

167 for record in records:

168 tag, data = record

169

170 if tag not in data_dict:

171 data_dict[tag] = []

172

173 data_dict[tag].append(tags[tag](data))

174

175 return data_dict

176

177 def _unmunge_label_index(label_index: str) -> tuple[str, str]:

178 """Splits a label_index string into a label and an index,

179 where the index is always the final 3 digits.

180

181 This function handles cases where the site label and index are combined

182 in CASTEP magres files (versions < 23),

183 e.g., 'H1222' instead of 'H1' and '222'.

184

185 Since site labels can contain numbers (e.g., H1, H2, H1a),

186 we extract the index as the final 3 digits.

187 The remaining characters form the label.

188

189 Note: Only call this function when label and index are confirmed

190 to be combined (detected by the line having 10 fields instead of 11).

191

192 Parameters

193 ----------

194 label_index : str

195 The input string containing the combined label and index

196 (e.g., 'H1222')

197

198 Returns

199 -------

200 tuple[str, str]

201 A tuple of (label, index) strings (e.g., ('H1', '222'))

202

203 Raises

204 ------

205 RuntimeError

206 If the index is >999 (not supported by this solution))

207 If invalid data format or regex match failure

208

209 Examples

210 --------

211 >>> _unmunge_label_index('H1222')

212 ('H1', '222')

213 >>> _unmunge_label_index('C201')

214 ('C', '201')

215 >>> _unmunge_label_index('H23104')

216 ('H23', '104')

217 >>> _unmunge_label_index('H1a100')

218 ('H1a', '100')

219 """

220 match = re.match(r'(.+?)(\d{3})$', label_index)

221 if match:

222 label, index = match.groups()

223 if not isinstance(label, str) or not isinstance(index, str):

224 raise RuntimeError("Regex match produced non-string values")

225 if index == '000':

226 raise RuntimeError(

227 "Index greater than 999 detected. This is not supported in "

228 "magres files with munged label and indices. "

229 "Try manually unmunging the label and index."

230 )

231 return (label, index)

232 raise RuntimeError('Invalid data in magres block. '

233 'Check the site labels and indices.')

234

235 def _parse_sitensor33(name, data):

236 # We expect label, index, and then the 3x3 tensor

237 if len(data) == 10:

238 label, index = _unmunge_label_index(data[0])

239 data = [label, index] + data[1:]

240 if len(data) != 11:

241 raise ValueError(

242 f"Expected 11 values for {name} tensor data, "

243 f"got {len(data)}"

244 )

245

246 return {'atom': {'label': data[0],

247 'index': int(data[1])},

248 name: tensor33([float(x) for x in data[2:]])}

249

250 def parse_atoms_block(block):

251 """

252 Parse atoms block into data dictionary given list of record tuples.

253 """

254

255 _name, records = block

256

257 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3

258 def lattice(d):

259 return tensor33([float(x) for x in data])

260

261 # Atom record: label, index, x, y, z

262 def atom(d):

263 return {'species': data[0],

264 'label': data[1],

265 'index': int(data[2]),

266 'position': tensor31([float(x) for x in data[3:]])}

267

268 def symmetry(d):

269 return ' '.join(data)

270

271 tags = {'lattice': lattice,

272 'atom': atom,

273 'units': check_units,

274 'symmetry': symmetry}

275

276 data_dict = {}

277

278 for record in records:

279 tag, data = record

280 if tag not in data_dict:

281 data_dict[tag] = []

282 data_dict[tag].append(tags[tag](data))

283

284 return data_dict

285

286 def parse_generic_block(block):

287 """

288 Parse any other block into data dictionary given list of record

289 tuples.

290 """

291

292 _name, records = block

293

294 data_dict = {}

295

296 for record in records:

297 tag, data = record

298

299 if tag not in data_dict:

300 data_dict[tag] = []

301

302 data_dict[tag].append(data)

303

304 return data_dict

305

306 """

307 Actual parser code.

308 """

309

310 block_parsers = {'magres': parse_magres_block,

311 'atoms': parse_atoms_block,

312 'calculation': parse_generic_block, }

313

314 file_contents = fd.read()

315

316 # This works as a validity check

317 version = get_version(file_contents)

318 if version is None:

319 # This isn't even a .magres file!

320 raise RuntimeError('File is not in standard Magres format')

321 blocks = parse_blocks(file_contents)

322

323 data_dict = {}

324

325 for block_data in blocks:

326 block = parse_block(block_data)

327

328 if block[0] in block_parsers:

329 block_dict = block_parsers[block[0]](block)

330 data_dict[block[0]] = block_dict

331 else:

332 # Throw in the text content of blocks we don't recognise

333 if include_unrecognised:

334 data_dict[block[0]] = block_data[1]

335

336 # Now the loaded data must be turned into an ASE Atoms object

337

338 # First check if the file is even viable

339 if 'atoms' not in data_dict:

340 raise RuntimeError('Magres file does not contain structure data')

341

342 # Allowed units handling. This is redundant for now but

343 # could turn out useful in the future

344

345 magres_units = {'Angstrom': ase.units.Ang}

346

347 # Lattice parameters?

348 if 'lattice' in data_dict['atoms']:

349 try:

350 u = dict(data_dict['atoms']['units'])['lattice']

351 except KeyError:

352 raise RuntimeError('No units detected in file for lattice')

353 u = magres_units[u]

354 cell = np.array(data_dict['atoms']['lattice'][0]) * u

355 pbc = True

356 else:

357 cell = None

358 pbc = False

359

360 # Now the atoms

361 symbols = []

362 positions = []

363 indices = []

364 labels = []

365

366 if 'atom' in data_dict['atoms']:

367 try:

368 u = dict(data_dict['atoms']['units'])['atom']

369 except KeyError:

370 raise RuntimeError('No units detected in file for atom positions')

371 u = magres_units[u]

372 # Now we have to account for the possibility of there being CASTEP

373 # 'custom' species amongst the symbols

374 custom_species = None

375 for a in data_dict['atoms']['atom']:

376 spec_custom = a['species'].split(':', 1)

377 if len(spec_custom) > 1 and custom_species is None:

378 # Add it to the custom info!

379 custom_species = list(symbols)

380 symbols.append(spec_custom[0])

381 positions.append(a['position'])

382 indices.append(a['index'])

383 labels.append(a['label'])

384 if custom_species is not None:

385 custom_species.append(a['species'])

386

387 atoms = Atoms(cell=cell,

388 pbc=pbc,

389 symbols=symbols,

390 positions=positions)

391

392 # Add custom species if present

393 if custom_species is not None:

394 atoms.new_array('castep_custom_species', np.array(custom_species))

395

396 # Add the spacegroup, if present and recognizable

397 if 'symmetry' in data_dict['atoms']:

398 try:

399 spg = Spacegroup(data_dict['atoms']['symmetry'][0])

400 except SpacegroupNotFoundError:

401 # Not found

402 spg = Spacegroup(1) # Most generic one

403 atoms.info['spacegroup'] = spg

404

405 # Set up the rest of the properties as arrays

406 atoms.new_array('indices', np.array(indices))

407 atoms.new_array('labels', np.array(labels))

408

409 # Now for the magres specific stuff

410 li_list = list(zip(labels, indices))

411

412 def create_magres_array(name, order, block):

413

414 if order == 1:

415 u_arr = [None] * len(li_list)

416 elif order == 2:

417 u_arr = [[None] * (i + 1) for i in range(len(li_list))]

418 else:

419 raise ValueError(

420 'Invalid order value passed to create_magres_array')

421

422 for s in block:

423 # Find the atom index/indices

424 if order == 1:

425 # First find out which atom this is

426 at = (s['atom']['label'], s['atom']['index'])

427 try:

428 ai = li_list.index(at)

429 except ValueError:

430 raise RuntimeError('Invalid data in magres block')

431 # Then add the relevant quantity

432 u_arr[ai] = s[mn]

433 else:

434 at1 = (s['atom1']['label'], s['atom1']['index'])

435 at2 = (s['atom2']['label'], s['atom2']['index'])

436 ai1 = li_list.index(at1)

437 ai2 = li_list.index(at2)

438 # Sort them

439 ai1, ai2 = sorted((ai1, ai2), reverse=True)

440 u_arr[ai1][ai2] = s[mn]

441

442 if order == 1:

443 return np.array(u_arr)

444 else:

445 return np.array(u_arr, dtype=object)

446

447 if 'magres' in data_dict:

448 if 'units' in data_dict['magres']:

449 atoms.info['magres_units'] = dict(data_dict['magres']['units'])

450 for u in atoms.info['magres_units']:

451 # This bit to keep track of tags

452 u0 = u.split('_')[0]

453

454 if u0 not in _mprops:

455 raise RuntimeError('Invalid data in magres block')

456

457 mn, order = _mprops[u0]

458

459 if order > 0:

460 u_arr = create_magres_array(mn, order,

461 data_dict['magres'][u])

462 atoms.new_array(u, u_arr)

463 else:

464 # atoms.info['magres_data'] = atoms.info.get('magres_data',

465 # {})

466 # # We only take element 0 because for this sort of data

467 # # there should be only that

468 # atoms.info['magres_data'][u] = \

469 # data_dict['magres'][u][0][mn]

470 if atoms.calc is None:

471 calc = SinglePointDFTCalculator(atoms)

472 atoms.calc = calc

473 atoms.calc.results[u] = data_dict['magres'][u][0][mn]

474

475 if 'calculation' in data_dict:

476 atoms.info['magresblock_calculation'] = data_dict['calculation']

477

478 if include_unrecognised:

479 for b in data_dict:

480 if b not in block_parsers:

481 atoms.info['magresblock_' + b] = data_dict[b]

482

483 return atoms

484

485

486def tensor_string(tensor):

487 return ' '.join(' '.join(str(x) for x in xs) for xs in tensor)

488

489

490def write_magres(fd, image):

491 """

492 A writing function for magres files. Two steps: first data are arranged

493 into structures, then dumped to the actual file

494 """

495

496 image_data = {}

497 image_data['atoms'] = {'units': []}

498 # Contains units, lattice and each individual atom

499 if np.all(image.get_pbc()):

500 # Has lattice!

501 image_data['atoms']['units'].append(['lattice', 'Angstrom'])

502 image_data['atoms']['lattice'] = [image.get_cell()]

503

504 # Now for the atoms

505 if image.has('labels'):

506 labels = image.get_array('labels')

507 else:

508 labels = image.get_chemical_symbols()

509

510 if image.has('indices'):

511 indices = image.get_array('indices')

512 else:

513 indices = [labels[:i + 1].count(labels[i]) for i in range(len(labels))]

514

515 # Iterate over atoms

516 symbols = (image.get_array('castep_custom_species')

517 if image.has('castep_custom_species')

518 else image.get_chemical_symbols())

519

520 atom_info = list(zip(symbols,

521 image.get_positions()))

522 if len(atom_info) > 0:

523 image_data['atoms']['units'].append(['atom', 'Angstrom'])

524 image_data['atoms']['atom'] = []

525

526 for i, a in enumerate(atom_info):

527 image_data['atoms']['atom'].append({

528 'index': indices[i],

529 'position': a[1],

530 'species': a[0],

531 'label': labels[i]})

532

533 # Spacegroup, if present

534 if 'spacegroup' in image.info:

535 image_data['atoms']['symmetry'] = [image.info['spacegroup']

536 .symbol.replace(' ', '')]

537

538 # Now go on to do the same for magres information

539 if 'magres_units' in image.info:

540

541 image_data['magres'] = {'units': []}

542

543 for u in image.info['magres_units']:

544 # Get the type

545 p = u.split('_')[0]

546 if p in _mprops:

547 image_data['magres']['units'].append(

548 [u, image.info['magres_units'][u]])

549 image_data['magres'][u] = []

550 mn, order = _mprops[p]

551

552 if order == 0:

553 # The case of susceptibility

554 tens = {

555 mn: image.calc.results[u]

556 }

557 image_data['magres'][u] = tens

558 else:

559 arr = image.get_array(u)

560 li_tab = zip(labels, indices)

561 for i, (lab, ind) in enumerate(li_tab):

562 if order == 2:

563 for j, (lab2, ind2) in enumerate(li_tab[:i + 1]):

564 if arr[i][j] is not None:

565 tens = {mn: arr[i][j],

566 'atom1': {'label': lab,

567 'index': ind},

568 'atom2': {'label': lab2,

569 'index': ind2}}

570 image_data['magres'][u].append(tens)

571 elif order == 1:

572 if arr[i] is not None:

573 tens = {mn: arr[i],

574 'atom': {'label': lab,

575 'index': ind}}

576 image_data['magres'][u].append(tens)

577

578 # Calculation block, if present

579 if 'magresblock_calculation' in image.info:

580 image_data['calculation'] = image.info['magresblock_calculation']

581

582 def write_units(data, out):

583 if 'units' in data:

584 for tag, units in data['units']:

585 out.append(f' units {tag} {units}')

586

587 def write_magres_block(data):

588 """

589 Write out a <magres> block from its dictionary representation

590 """

591

592 out = []

593

594 def nout(tag, tensor_name):

595 if tag in data:

596 out.append(' '.join([' ', tag,

597 tensor_string(data[tag][tensor_name])]))

598

599 def siout(tag, tensor_name):

600 if tag in data:

601 for atom_si in data[tag]:

602 out.append((' %s %s %d '

603 '%s') % (tag,

604 atom_si['atom']['label'],

605 atom_si['atom']['index'],

606 tensor_string(atom_si[tensor_name])))

607

608 write_units(data, out)

609

610 nout('sus', 'S')

611 siout('ms', 'sigma')

612

613 siout('efg_local', 'V')

614 siout('efg_nonlocal', 'V')

615 siout('efg', 'V')

616

617 def sisiout(tag, tensor_name):

618 if tag in data:

619 for isc in data[tag]:

620 out.append((' %s %s %d %s %d '

621 '%s') % (tag,

622 isc['atom1']['label'],

623 isc['atom1']['index'],

624 isc['atom2']['label'],

625 isc['atom2']['index'],

626 tensor_string(isc[tensor_name])))

627

628 sisiout('isc_fc', 'K')

629 sisiout('isc_orbital_p', 'K')

630 sisiout('isc_orbital_d', 'K')

631 sisiout('isc_spin', 'K')

632 sisiout('isc', 'K')

633

634 return '\n'.join(out)

635

636 def write_atoms_block(data):

637 out = []

638

639 write_units(data, out)

640

641 if 'lattice' in data:

642 for lat in data['lattice']:

643 out.append(f" lattice {tensor_string(lat)}")

644

645 if 'symmetry' in data:

646 for sym in data['symmetry']:

647 out.append(f' symmetry {sym}')

648

649 if 'atom' in data:

650 for a in data['atom']:

651 out.append((' atom %s %s %s '

652 '%s') % (a['species'],

653 a['label'],

654 a['index'],

655 ' '.join(str(x) for x in a['position'])))

656

657 return '\n'.join(out)

658

659 def write_generic_block(data):

660 out = []

661

662 for tag, data in data.items():

663 for value in data:

664 out.append('{} {}'.format(tag, ' '.join(str(x) for x in value)))

665

666 return '\n'.join(out)

667

668 # Using this to preserve order

669 block_writers = OrderedDict([('calculation', write_generic_block),

670 ('atoms', write_atoms_block),

671 ('magres', write_magres_block)])

672

673 # First, write the header

674 fd.write('#$magres-abinitio-v1.0\n')

675 fd.write('# Generated by the Atomic Simulation Environment library\n')

676

677 for b in block_writers:

678 if b in image_data:

679 fd.write(f'[{b}]\n')

680 fd.write(block_writers[b](image_data[b]))

681 fd.write(f'\n[/{b}]\n')

682

683 # Now on to check for any non-standard blocks...

684 for i in image.info:

685 if '_' in i:

686 ismag, b = i.split('_', 1)

687 if ismag == 'magresblock' and b not in block_writers:

688 fd.write(f'[{b}]\n')

689 fd.write(image.info[i])

690 fd.write(f'[/{b}]\n')