Coverage for ase / io / _magres.py: 94.85%
97 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 15:52 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 15:52 +0000
1"""Helper functions to parse different blocks."""
3import re
5import numpy as np
8def tensor33(x):
9 return np.squeeze(np.reshape(x, (3, 3))).tolist()
12def tensor31(x):
13 return np.squeeze(np.reshape(x, (3, 1))).tolist()
16def get_version(file_contents):
17 """
18 Look for and parse the magres file format version line
19 """
21 lines = file_contents.split('\n')
22 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0])
24 if match:
25 version = match.groups()
26 version = tuple(vnum for vnum in version)
27 else:
28 version = None
30 return version
33def parse_blocks(file_contents):
34 """
35 Parse series of XML-like deliminated blocks into a list of
36 (block_name, contents) tuples
37 """
38 blocks_re = re.compile(
39 r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' + r'(?P=block_name)[\]>]',
40 re.M | re.S,
41 )
43 blocks = blocks_re.findall(file_contents)
45 return blocks
48def parse_block(block):
49 """
50 Parse block contents into a series of (tag, data) records
51 """
53 def clean_line(line):
54 # Remove comments and whitespace at start and ends of line
55 line = re.sub('#(.*?)\n', '', line)
56 line = line.strip()
58 return line
60 name, data = block
62 lines = [clean_line(line) for line in data.split('\n')]
64 records = []
66 for line in lines:
67 xs = line.split()
69 if len(xs) > 0:
70 tag = xs[0]
71 data = xs[1:]
73 records.append((tag, data))
75 return (name, records)
78def check_units(d):
79 """
80 Verify that given units for a particular tag are correct.
81 """
83 allowed_units = {
84 'lattice': 'Angstrom',
85 'atom': 'Angstrom',
86 'ms': 'ppm',
87 'efg': 'au',
88 'efg_local': 'au',
89 'efg_nonlocal': 'au',
90 'isc': '10^19.T^2.J^-1',
91 'isc_fc': '10^19.T^2.J^-1',
92 'isc_orbital_p': '10^19.T^2.J^-1',
93 'isc_orbital_d': '10^19.T^2.J^-1',
94 'isc_spin': '10^19.T^2.J^-1',
95 'sus': '10^-6.cm^3.mol^-1',
96 'calc_cutoffenergy': 'Hartree',
97 }
99 if d[0] in d and d[1] == allowed_units[d[0]]:
100 pass
101 else:
102 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}')
104 return d
107def parse_magres_block(block):
108 """
109 Parse magres block into data dictionary given list of record
110 tuples.
111 """
113 _name, records = block
115 # 3x3 tensor
116 def ntensor33(name):
117 return lambda d: {name: tensor33([float(x) for x in data])}
119 # Atom label, atom index and 3x3 tensor
120 def sitensor33(name):
121 return lambda d: _parse_sitensor33(name, data)
123 # 2x(Atom label, atom index) and 3x3 tensor
124 def sisitensor33(name):
125 return lambda d: {
126 'atom1': {'label': data[0], 'index': int(data[1])},
127 'atom2': {'label': data[2], 'index': int(data[3])},
128 name: tensor33([float(x) for x in data[4:]]),
129 }
131 tags = {
132 'ms': sitensor33('sigma'),
133 'sus': ntensor33('S'),
134 'efg': sitensor33('V'),
135 'efg_local': sitensor33('V'),
136 'efg_nonlocal': sitensor33('V'),
137 'isc': sisitensor33('K'),
138 'isc_fc': sisitensor33('K'),
139 'isc_spin': sisitensor33('K'),
140 'isc_orbital_p': sisitensor33('K'),
141 'isc_orbital_d': sisitensor33('K'),
142 'units': check_units,
143 }
145 data_dict = {}
147 for record in records:
148 tag, data = record
150 if tag not in data_dict:
151 data_dict[tag] = []
153 data_dict[tag].append(tags[tag](data))
155 return data_dict
158def _unmunge_label_index(label_index: str) -> tuple[str, str]:
159 """Splits a label_index string into a label and an index,
160 where the index is always the final 3 digits.
162 This function handles cases where the site label and index are combined
163 in CASTEP magres files (versions < 23),
164 e.g., 'H1222' instead of 'H1' and '222'.
166 Since site labels can contain numbers (e.g., H1, H2, H1a),
167 we extract the index as the final 3 digits.
168 The remaining characters form the label.
170 Note: Only call this function when label and index are confirmed
171 to be combined (detected by the line having 10 fields instead of 11).
173 Parameters
174 ----------
175 label_index : str
176 The input string containing the combined label and index
177 (e.g., 'H1222')
179 Returns
180 -------
181 tuple[str, str]
182 A tuple of (label, index) strings (e.g., ('H1', '222'))
184 Raises
185 ------
186 RuntimeError
187 If the index is >999 (not supported by this solution))
188 If invalid data format or regex match failure
190 Examples
191 --------
192 >>> _unmunge_label_index('H1222')
193 ('H1', '222')
194 >>> _unmunge_label_index('C201')
195 ('C', '201')
196 >>> _unmunge_label_index('H23104')
197 ('H23', '104')
198 >>> _unmunge_label_index('H1a100')
199 ('H1a', '100')
200 """
201 match = re.match(r'(.+?)(\d{3})$', label_index)
202 if match:
203 label, index = match.groups()
204 if not isinstance(label, str) or not isinstance(index, str):
205 raise RuntimeError('Regex match produced non-string values')
206 if index == '000':
207 raise RuntimeError(
208 'Index greater than 999 detected. This is not supported in '
209 'magres files with munged label and indices. '
210 'Try manually unmunging the label and index.'
211 )
212 return (label, index)
213 raise RuntimeError(
214 'Invalid data in magres block. Check the site labels and indices.'
215 )
218def _parse_sitensor33(name, data):
219 # We expect label, index, and then the 3x3 tensor
220 if len(data) == 10:
221 label, index = _unmunge_label_index(data[0])
222 data = [label, index] + data[1:]
223 if len(data) != 11:
224 raise ValueError(
225 f'Expected 11 values for {name} tensor data, got {len(data)}'
226 )
228 return {
229 'atom': {'label': data[0], 'index': int(data[1])},
230 name: tensor33([float(x) for x in data[2:]]),
231 }
234def parse_atoms_block(block):
235 """
236 Parse atoms block into data dictionary given list of record tuples.
237 """
239 _name, records = block
241 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3
242 def lattice(d):
243 return tensor33([float(x) for x in data])
245 # Atom record: label, index, x, y, z
246 def atom(d):
247 return {
248 'species': data[0],
249 'label': data[1],
250 'index': int(data[2]),
251 'position': tensor31([float(x) for x in data[3:]]),
252 }
254 def symmetry(d):
255 return ' '.join(data)
257 tags = {
258 'lattice': lattice,
259 'atom': atom,
260 'units': check_units,
261 'symmetry': symmetry,
262 }
264 data_dict = {}
266 for record in records:
267 tag, data = record
268 if tag not in data_dict:
269 data_dict[tag] = []
270 data_dict[tag].append(tags[tag](data))
272 return data_dict
275def parse_generic_block(block):
276 """
277 Parse any other block into data dictionary given list of record
278 tuples.
279 """
281 _name, records = block
283 data_dict = {}
285 for record in records:
286 tag, data = record
288 if tag not in data_dict:
289 data_dict[tag] = []
291 data_dict[tag].append(data)
293 return data_dict