Coverage for /builds/ase/ase/ase/io/magres.py: 84.21%
323 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3"""This module provides I/O functions for the MAGRES file format, introduced
4by CASTEP as an output format to store structural data and ab-initio
5calculated NMR parameters.
6Authors: Simone Sturniolo (ase implementation), Tim Green (original magres
7 parser code)
8"""
10import re
11from collections import OrderedDict
13import numpy as np
15import ase.units
16from ase.atoms import Atoms
17from ase.calculators.singlepoint import SinglePointDFTCalculator
18from ase.spacegroup import Spacegroup
19from ase.spacegroup.spacegroup import SpacegroupNotFoundError
21_mprops = {
22 'ms': ('sigma', 1),
23 'sus': ('S', 0),
24 'efg': ('V', 1),
25 'isc': ('K', 2)}
26# (matrix name, number of atoms in interaction) for various magres quantities
29def read_magres(fd, include_unrecognised=False):
30 """
31 Reader function for magres files.
32 """
34 blocks_re = re.compile(r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' +
35 r'(?P=block_name)[\]>]', re.M | re.S)
37 """
38 Here are defined the various functions required to parse
39 different blocks.
40 """
42 def tensor33(x):
43 return np.squeeze(np.reshape(x, (3, 3))).tolist()
45 def tensor31(x):
46 return np.squeeze(np.reshape(x, (3, 1))).tolist()
48 def get_version(file_contents):
49 """
50 Look for and parse the magres file format version line
51 """
53 lines = file_contents.split('\n')
54 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0])
56 if match:
57 version = match.groups()
58 version = tuple(vnum for vnum in version)
59 else:
60 version = None
62 return version
64 def parse_blocks(file_contents):
65 """
66 Parse series of XML-like deliminated blocks into a list of
67 (block_name, contents) tuples
68 """
70 blocks = blocks_re.findall(file_contents)
72 return blocks
74 def parse_block(block):
75 """
76 Parse block contents into a series of (tag, data) records
77 """
79 def clean_line(line):
80 # Remove comments and whitespace at start and ends of line
81 line = re.sub('#(.*?)\n', '', line)
82 line = line.strip()
84 return line
86 name, data = block
88 lines = [clean_line(line) for line in data.split('\n')]
90 records = []
92 for line in lines:
93 xs = line.split()
95 if len(xs) > 0:
96 tag = xs[0]
97 data = xs[1:]
99 records.append((tag, data))
101 return (name, records)
103 def check_units(d):
104 """
105 Verify that given units for a particular tag are correct.
106 """
108 allowed_units = {'lattice': 'Angstrom',
109 'atom': 'Angstrom',
110 'ms': 'ppm',
111 'efg': 'au',
112 'efg_local': 'au',
113 'efg_nonlocal': 'au',
114 'isc': '10^19.T^2.J^-1',
115 'isc_fc': '10^19.T^2.J^-1',
116 'isc_orbital_p': '10^19.T^2.J^-1',
117 'isc_orbital_d': '10^19.T^2.J^-1',
118 'isc_spin': '10^19.T^2.J^-1',
119 'sus': '10^-6.cm^3.mol^-1',
120 'calc_cutoffenergy': 'Hartree', }
122 if d[0] in d and d[1] == allowed_units[d[0]]:
123 pass
124 else:
125 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}')
127 return d
129 def parse_magres_block(block):
130 """
131 Parse magres block into data dictionary given list of record
132 tuples.
133 """
135 _name, records = block
137 # 3x3 tensor
138 def ntensor33(name):
139 return lambda d: {name: tensor33([float(x) for x in data])}
141 # Atom label, atom index and 3x3 tensor
142 def sitensor33(name):
143 return lambda d: _parse_sitensor33(name, data)
145 # 2x(Atom label, atom index) and 3x3 tensor
146 def sisitensor33(name):
147 return lambda d: {'atom1': {'label': data[0],
148 'index': int(data[1])},
149 'atom2': {'label': data[2],
150 'index': int(data[3])},
151 name: tensor33([float(x) for x in data[4:]])}
153 tags = {'ms': sitensor33('sigma'),
154 'sus': ntensor33('S'),
155 'efg': sitensor33('V'),
156 'efg_local': sitensor33('V'),
157 'efg_nonlocal': sitensor33('V'),
158 'isc': sisitensor33('K'),
159 'isc_fc': sisitensor33('K'),
160 'isc_spin': sisitensor33('K'),
161 'isc_orbital_p': sisitensor33('K'),
162 'isc_orbital_d': sisitensor33('K'),
163 'units': check_units}
165 data_dict = {}
167 for record in records:
168 tag, data = record
170 if tag not in data_dict:
171 data_dict[tag] = []
173 data_dict[tag].append(tags[tag](data))
175 return data_dict
177 def _unmunge_label_index(label_index: str) -> tuple[str, str]:
178 """Splits a label_index string into a label and an index,
179 where the index is always the final 3 digits.
181 This function handles cases where the site label and index are combined
182 in CASTEP magres files (versions < 23),
183 e.g., 'H1222' instead of 'H1' and '222'.
185 Since site labels can contain numbers (e.g., H1, H2, H1a),
186 we extract the index as the final 3 digits.
187 The remaining characters form the label.
189 Note: Only call this function when label and index are confirmed
190 to be combined (detected by the line having 10 fields instead of 11).
192 Parameters
193 ----------
194 label_index : str
195 The input string containing the combined label and index
196 (e.g., 'H1222')
198 Returns
199 -------
200 tuple[str, str]
201 A tuple of (label, index) strings (e.g., ('H1', '222'))
203 Raises
204 ------
205 RuntimeError
206 If the index is >999 (not supported by this solution))
207 If invalid data format or regex match failure
209 Examples
210 --------
211 >>> _unmunge_label_index('H1222')
212 ('H1', '222')
213 >>> _unmunge_label_index('C201')
214 ('C', '201')
215 >>> _unmunge_label_index('H23104')
216 ('H23', '104')
217 >>> _unmunge_label_index('H1a100')
218 ('H1a', '100')
219 """
220 match = re.match(r'(.+?)(\d{3})$', label_index)
221 if match:
222 label, index = match.groups()
223 if not isinstance(label, str) or not isinstance(index, str):
224 raise RuntimeError("Regex match produced non-string values")
225 if index == '000':
226 raise RuntimeError(
227 "Index greater than 999 detected. This is not supported in "
228 "magres files with munged label and indices. "
229 "Try manually unmunging the label and index."
230 )
231 return (label, index)
232 raise RuntimeError('Invalid data in magres block. '
233 'Check the site labels and indices.')
235 def _parse_sitensor33(name, data):
236 # We expect label, index, and then the 3x3 tensor
237 if len(data) == 10:
238 label, index = _unmunge_label_index(data[0])
239 data = [label, index] + data[1:]
240 if len(data) != 11:
241 raise ValueError(
242 f"Expected 11 values for {name} tensor data, "
243 f"got {len(data)}"
244 )
246 return {'atom': {'label': data[0],
247 'index': int(data[1])},
248 name: tensor33([float(x) for x in data[2:]])}
250 def parse_atoms_block(block):
251 """
252 Parse atoms block into data dictionary given list of record tuples.
253 """
255 _name, records = block
257 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3
258 def lattice(d):
259 return tensor33([float(x) for x in data])
261 # Atom record: label, index, x, y, z
262 def atom(d):
263 return {'species': data[0],
264 'label': data[1],
265 'index': int(data[2]),
266 'position': tensor31([float(x) for x in data[3:]])}
268 def symmetry(d):
269 return ' '.join(data)
271 tags = {'lattice': lattice,
272 'atom': atom,
273 'units': check_units,
274 'symmetry': symmetry}
276 data_dict = {}
278 for record in records:
279 tag, data = record
280 if tag not in data_dict:
281 data_dict[tag] = []
282 data_dict[tag].append(tags[tag](data))
284 return data_dict
286 def parse_generic_block(block):
287 """
288 Parse any other block into data dictionary given list of record
289 tuples.
290 """
292 _name, records = block
294 data_dict = {}
296 for record in records:
297 tag, data = record
299 if tag not in data_dict:
300 data_dict[tag] = []
302 data_dict[tag].append(data)
304 return data_dict
306 """
307 Actual parser code.
308 """
310 block_parsers = {'magres': parse_magres_block,
311 'atoms': parse_atoms_block,
312 'calculation': parse_generic_block, }
314 file_contents = fd.read()
316 # This works as a validity check
317 version = get_version(file_contents)
318 if version is None:
319 # This isn't even a .magres file!
320 raise RuntimeError('File is not in standard Magres format')
321 blocks = parse_blocks(file_contents)
323 data_dict = {}
325 for block_data in blocks:
326 block = parse_block(block_data)
328 if block[0] in block_parsers:
329 block_dict = block_parsers[block[0]](block)
330 data_dict[block[0]] = block_dict
331 else:
332 # Throw in the text content of blocks we don't recognise
333 if include_unrecognised:
334 data_dict[block[0]] = block_data[1]
336 # Now the loaded data must be turned into an ASE Atoms object
338 # First check if the file is even viable
339 if 'atoms' not in data_dict:
340 raise RuntimeError('Magres file does not contain structure data')
342 # Allowed units handling. This is redundant for now but
343 # could turn out useful in the future
345 magres_units = {'Angstrom': ase.units.Ang}
347 # Lattice parameters?
348 if 'lattice' in data_dict['atoms']:
349 try:
350 u = dict(data_dict['atoms']['units'])['lattice']
351 except KeyError:
352 raise RuntimeError('No units detected in file for lattice')
353 u = magres_units[u]
354 cell = np.array(data_dict['atoms']['lattice'][0]) * u
355 pbc = True
356 else:
357 cell = None
358 pbc = False
360 # Now the atoms
361 symbols = []
362 positions = []
363 indices = []
364 labels = []
366 if 'atom' in data_dict['atoms']:
367 try:
368 u = dict(data_dict['atoms']['units'])['atom']
369 except KeyError:
370 raise RuntimeError('No units detected in file for atom positions')
371 u = magres_units[u]
372 # Now we have to account for the possibility of there being CASTEP
373 # 'custom' species amongst the symbols
374 custom_species = None
375 for a in data_dict['atoms']['atom']:
376 spec_custom = a['species'].split(':', 1)
377 if len(spec_custom) > 1 and custom_species is None:
378 # Add it to the custom info!
379 custom_species = list(symbols)
380 symbols.append(spec_custom[0])
381 positions.append(a['position'])
382 indices.append(a['index'])
383 labels.append(a['label'])
384 if custom_species is not None:
385 custom_species.append(a['species'])
387 atoms = Atoms(cell=cell,
388 pbc=pbc,
389 symbols=symbols,
390 positions=positions)
392 # Add custom species if present
393 if custom_species is not None:
394 atoms.new_array('castep_custom_species', np.array(custom_species))
396 # Add the spacegroup, if present and recognizable
397 if 'symmetry' in data_dict['atoms']:
398 try:
399 spg = Spacegroup(data_dict['atoms']['symmetry'][0])
400 except SpacegroupNotFoundError:
401 # Not found
402 spg = Spacegroup(1) # Most generic one
403 atoms.info['spacegroup'] = spg
405 # Set up the rest of the properties as arrays
406 atoms.new_array('indices', np.array(indices))
407 atoms.new_array('labels', np.array(labels))
409 # Now for the magres specific stuff
410 li_list = list(zip(labels, indices))
412 def create_magres_array(name, order, block):
414 if order == 1:
415 u_arr = [None] * len(li_list)
416 elif order == 2:
417 u_arr = [[None] * (i + 1) for i in range(len(li_list))]
418 else:
419 raise ValueError(
420 'Invalid order value passed to create_magres_array')
422 for s in block:
423 # Find the atom index/indices
424 if order == 1:
425 # First find out which atom this is
426 at = (s['atom']['label'], s['atom']['index'])
427 try:
428 ai = li_list.index(at)
429 except ValueError:
430 raise RuntimeError('Invalid data in magres block')
431 # Then add the relevant quantity
432 u_arr[ai] = s[mn]
433 else:
434 at1 = (s['atom1']['label'], s['atom1']['index'])
435 at2 = (s['atom2']['label'], s['atom2']['index'])
436 ai1 = li_list.index(at1)
437 ai2 = li_list.index(at2)
438 # Sort them
439 ai1, ai2 = sorted((ai1, ai2), reverse=True)
440 u_arr[ai1][ai2] = s[mn]
442 if order == 1:
443 return np.array(u_arr)
444 else:
445 return np.array(u_arr, dtype=object)
447 if 'magres' in data_dict:
448 if 'units' in data_dict['magres']:
449 atoms.info['magres_units'] = dict(data_dict['magres']['units'])
450 for u in atoms.info['magres_units']:
451 # This bit to keep track of tags
452 u0 = u.split('_')[0]
454 if u0 not in _mprops:
455 raise RuntimeError('Invalid data in magres block')
457 mn, order = _mprops[u0]
459 if order > 0:
460 u_arr = create_magres_array(mn, order,
461 data_dict['magres'][u])
462 atoms.new_array(u, u_arr)
463 else:
464 # atoms.info['magres_data'] = atoms.info.get('magres_data',
465 # {})
466 # # We only take element 0 because for this sort of data
467 # # there should be only that
468 # atoms.info['magres_data'][u] = \
469 # data_dict['magres'][u][0][mn]
470 if atoms.calc is None:
471 calc = SinglePointDFTCalculator(atoms)
472 atoms.calc = calc
473 atoms.calc.results[u] = data_dict['magres'][u][0][mn]
475 if 'calculation' in data_dict:
476 atoms.info['magresblock_calculation'] = data_dict['calculation']
478 if include_unrecognised:
479 for b in data_dict:
480 if b not in block_parsers:
481 atoms.info['magresblock_' + b] = data_dict[b]
483 return atoms
486def tensor_string(tensor):
487 return ' '.join(' '.join(str(x) for x in xs) for xs in tensor)
490def write_magres(fd, image):
491 """
492 A writing function for magres files. Two steps: first data are arranged
493 into structures, then dumped to the actual file
494 """
496 image_data = {}
497 image_data['atoms'] = {'units': []}
498 # Contains units, lattice and each individual atom
499 if np.all(image.get_pbc()):
500 # Has lattice!
501 image_data['atoms']['units'].append(['lattice', 'Angstrom'])
502 image_data['atoms']['lattice'] = [image.get_cell()]
504 # Now for the atoms
505 if image.has('labels'):
506 labels = image.get_array('labels')
507 else:
508 labels = image.get_chemical_symbols()
510 if image.has('indices'):
511 indices = image.get_array('indices')
512 else:
513 indices = [labels[:i + 1].count(labels[i]) for i in range(len(labels))]
515 # Iterate over atoms
516 symbols = (image.get_array('castep_custom_species')
517 if image.has('castep_custom_species')
518 else image.get_chemical_symbols())
520 atom_info = list(zip(symbols,
521 image.get_positions()))
522 if len(atom_info) > 0:
523 image_data['atoms']['units'].append(['atom', 'Angstrom'])
524 image_data['atoms']['atom'] = []
526 for i, a in enumerate(atom_info):
527 image_data['atoms']['atom'].append({
528 'index': indices[i],
529 'position': a[1],
530 'species': a[0],
531 'label': labels[i]})
533 # Spacegroup, if present
534 if 'spacegroup' in image.info:
535 image_data['atoms']['symmetry'] = [image.info['spacegroup']
536 .symbol.replace(' ', '')]
538 # Now go on to do the same for magres information
539 if 'magres_units' in image.info:
541 image_data['magres'] = {'units': []}
543 for u in image.info['magres_units']:
544 # Get the type
545 p = u.split('_')[0]
546 if p in _mprops:
547 image_data['magres']['units'].append(
548 [u, image.info['magres_units'][u]])
549 image_data['magres'][u] = []
550 mn, order = _mprops[p]
552 if order == 0:
553 # The case of susceptibility
554 tens = {
555 mn: image.calc.results[u]
556 }
557 image_data['magres'][u] = tens
558 else:
559 arr = image.get_array(u)
560 li_tab = zip(labels, indices)
561 for i, (lab, ind) in enumerate(li_tab):
562 if order == 2:
563 for j, (lab2, ind2) in enumerate(li_tab[:i + 1]):
564 if arr[i][j] is not None:
565 tens = {mn: arr[i][j],
566 'atom1': {'label': lab,
567 'index': ind},
568 'atom2': {'label': lab2,
569 'index': ind2}}
570 image_data['magres'][u].append(tens)
571 elif order == 1:
572 if arr[i] is not None:
573 tens = {mn: arr[i],
574 'atom': {'label': lab,
575 'index': ind}}
576 image_data['magres'][u].append(tens)
578 # Calculation block, if present
579 if 'magresblock_calculation' in image.info:
580 image_data['calculation'] = image.info['magresblock_calculation']
582 def write_units(data, out):
583 if 'units' in data:
584 for tag, units in data['units']:
585 out.append(f' units {tag} {units}')
587 def write_magres_block(data):
588 """
589 Write out a <magres> block from its dictionary representation
590 """
592 out = []
594 def nout(tag, tensor_name):
595 if tag in data:
596 out.append(' '.join([' ', tag,
597 tensor_string(data[tag][tensor_name])]))
599 def siout(tag, tensor_name):
600 if tag in data:
601 for atom_si in data[tag]:
602 out.append((' %s %s %d '
603 '%s') % (tag,
604 atom_si['atom']['label'],
605 atom_si['atom']['index'],
606 tensor_string(atom_si[tensor_name])))
608 write_units(data, out)
610 nout('sus', 'S')
611 siout('ms', 'sigma')
613 siout('efg_local', 'V')
614 siout('efg_nonlocal', 'V')
615 siout('efg', 'V')
617 def sisiout(tag, tensor_name):
618 if tag in data:
619 for isc in data[tag]:
620 out.append((' %s %s %d %s %d '
621 '%s') % (tag,
622 isc['atom1']['label'],
623 isc['atom1']['index'],
624 isc['atom2']['label'],
625 isc['atom2']['index'],
626 tensor_string(isc[tensor_name])))
628 sisiout('isc_fc', 'K')
629 sisiout('isc_orbital_p', 'K')
630 sisiout('isc_orbital_d', 'K')
631 sisiout('isc_spin', 'K')
632 sisiout('isc', 'K')
634 return '\n'.join(out)
636 def write_atoms_block(data):
637 out = []
639 write_units(data, out)
641 if 'lattice' in data:
642 for lat in data['lattice']:
643 out.append(f" lattice {tensor_string(lat)}")
645 if 'symmetry' in data:
646 for sym in data['symmetry']:
647 out.append(f' symmetry {sym}')
649 if 'atom' in data:
650 for a in data['atom']:
651 out.append((' atom %s %s %s '
652 '%s') % (a['species'],
653 a['label'],
654 a['index'],
655 ' '.join(str(x) for x in a['position'])))
657 return '\n'.join(out)
659 def write_generic_block(data):
660 out = []
662 for tag, data in data.items():
663 for value in data:
664 out.append('{} {}'.format(tag, ' '.join(str(x) for x in value)))
666 return '\n'.join(out)
668 # Using this to preserve order
669 block_writers = OrderedDict([('calculation', write_generic_block),
670 ('atoms', write_atoms_block),
671 ('magres', write_magres_block)])
673 # First, write the header
674 fd.write('#$magres-abinitio-v1.0\n')
675 fd.write('# Generated by the Atomic Simulation Environment library\n')
677 for b in block_writers:
678 if b in image_data:
679 fd.write(f'[{b}]\n')
680 fd.write(block_writers[b](image_data[b]))
681 fd.write(f'\n[/{b}]\n')
683 # Now on to check for any non-standard blocks...
684 for i in image.info:
685 if '_' in i:
686 ismag, b = i.split('_', 1)
687 if ismag == 'magresblock' and b not in block_writers:
688 fd.write(f'[{b}]\n')
689 fd.write(image.info[i])
690 fd.write(f'[/{b}]\n')