Coverage for ase / io / _magres.py: 94.85%

97 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-21 15:52 +0000

1"""Helper functions to parse different blocks.""" 

2 

3import re 

4 

5import numpy as np 

6 

7 

8def tensor33(x): 

9 return np.squeeze(np.reshape(x, (3, 3))).tolist() 

10 

11 

12def tensor31(x): 

13 return np.squeeze(np.reshape(x, (3, 1))).tolist() 

14 

15 

16def get_version(file_contents): 

17 """ 

18 Look for and parse the magres file format version line 

19 """ 

20 

21 lines = file_contents.split('\n') 

22 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0]) 

23 

24 if match: 

25 version = match.groups() 

26 version = tuple(vnum for vnum in version) 

27 else: 

28 version = None 

29 

30 return version 

31 

32 

33def parse_blocks(file_contents): 

34 """ 

35 Parse series of XML-like deliminated blocks into a list of 

36 (block_name, contents) tuples 

37 """ 

38 blocks_re = re.compile( 

39 r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' + r'(?P=block_name)[\]>]', 

40 re.M | re.S, 

41 ) 

42 

43 blocks = blocks_re.findall(file_contents) 

44 

45 return blocks 

46 

47 

48def parse_block(block): 

49 """ 

50 Parse block contents into a series of (tag, data) records 

51 """ 

52 

53 def clean_line(line): 

54 # Remove comments and whitespace at start and ends of line 

55 line = re.sub('#(.*?)\n', '', line) 

56 line = line.strip() 

57 

58 return line 

59 

60 name, data = block 

61 

62 lines = [clean_line(line) for line in data.split('\n')] 

63 

64 records = [] 

65 

66 for line in lines: 

67 xs = line.split() 

68 

69 if len(xs) > 0: 

70 tag = xs[0] 

71 data = xs[1:] 

72 

73 records.append((tag, data)) 

74 

75 return (name, records) 

76 

77 

78def check_units(d): 

79 """ 

80 Verify that given units for a particular tag are correct. 

81 """ 

82 

83 allowed_units = { 

84 'lattice': 'Angstrom', 

85 'atom': 'Angstrom', 

86 'ms': 'ppm', 

87 'efg': 'au', 

88 'efg_local': 'au', 

89 'efg_nonlocal': 'au', 

90 'isc': '10^19.T^2.J^-1', 

91 'isc_fc': '10^19.T^2.J^-1', 

92 'isc_orbital_p': '10^19.T^2.J^-1', 

93 'isc_orbital_d': '10^19.T^2.J^-1', 

94 'isc_spin': '10^19.T^2.J^-1', 

95 'sus': '10^-6.cm^3.mol^-1', 

96 'calc_cutoffenergy': 'Hartree', 

97 } 

98 

99 if d[0] in d and d[1] == allowed_units[d[0]]: 

100 pass 

101 else: 

102 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}') 

103 

104 return d 

105 

106 

107def parse_magres_block(block): 

108 """ 

109 Parse magres block into data dictionary given list of record 

110 tuples. 

111 """ 

112 

113 _name, records = block 

114 

115 # 3x3 tensor 

116 def ntensor33(name): 

117 return lambda d: {name: tensor33([float(x) for x in data])} 

118 

119 # Atom label, atom index and 3x3 tensor 

120 def sitensor33(name): 

121 return lambda d: _parse_sitensor33(name, data) 

122 

123 # 2x(Atom label, atom index) and 3x3 tensor 

124 def sisitensor33(name): 

125 return lambda d: { 

126 'atom1': {'label': data[0], 'index': int(data[1])}, 

127 'atom2': {'label': data[2], 'index': int(data[3])}, 

128 name: tensor33([float(x) for x in data[4:]]), 

129 } 

130 

131 tags = { 

132 'ms': sitensor33('sigma'), 

133 'sus': ntensor33('S'), 

134 'efg': sitensor33('V'), 

135 'efg_local': sitensor33('V'), 

136 'efg_nonlocal': sitensor33('V'), 

137 'isc': sisitensor33('K'), 

138 'isc_fc': sisitensor33('K'), 

139 'isc_spin': sisitensor33('K'), 

140 'isc_orbital_p': sisitensor33('K'), 

141 'isc_orbital_d': sisitensor33('K'), 

142 'units': check_units, 

143 } 

144 

145 data_dict = {} 

146 

147 for record in records: 

148 tag, data = record 

149 

150 if tag not in data_dict: 

151 data_dict[tag] = [] 

152 

153 data_dict[tag].append(tags[tag](data)) 

154 

155 return data_dict 

156 

157 

158def _unmunge_label_index(label_index: str) -> tuple[str, str]: 

159 """Splits a label_index string into a label and an index, 

160 where the index is always the final 3 digits. 

161 

162 This function handles cases where the site label and index are combined 

163 in CASTEP magres files (versions < 23), 

164 e.g., 'H1222' instead of 'H1' and '222'. 

165 

166 Since site labels can contain numbers (e.g., H1, H2, H1a), 

167 we extract the index as the final 3 digits. 

168 The remaining characters form the label. 

169 

170 Note: Only call this function when label and index are confirmed 

171 to be combined (detected by the line having 10 fields instead of 11). 

172 

173 Parameters 

174 ---------- 

175 label_index : str 

176 The input string containing the combined label and index 

177 (e.g., 'H1222') 

178 

179 Returns 

180 ------- 

181 tuple[str, str] 

182 A tuple of (label, index) strings (e.g., ('H1', '222')) 

183 

184 Raises 

185 ------ 

186 RuntimeError 

187 If the index is >999 (not supported by this solution)) 

188 If invalid data format or regex match failure 

189 

190 Examples 

191 -------- 

192 >>> _unmunge_label_index('H1222') 

193 ('H1', '222') 

194 >>> _unmunge_label_index('C201') 

195 ('C', '201') 

196 >>> _unmunge_label_index('H23104') 

197 ('H23', '104') 

198 >>> _unmunge_label_index('H1a100') 

199 ('H1a', '100') 

200 """ 

201 match = re.match(r'(.+?)(\d{3})$', label_index) 

202 if match: 

203 label, index = match.groups() 

204 if not isinstance(label, str) or not isinstance(index, str): 

205 raise RuntimeError('Regex match produced non-string values') 

206 if index == '000': 

207 raise RuntimeError( 

208 'Index greater than 999 detected. This is not supported in ' 

209 'magres files with munged label and indices. ' 

210 'Try manually unmunging the label and index.' 

211 ) 

212 return (label, index) 

213 raise RuntimeError( 

214 'Invalid data in magres block. Check the site labels and indices.' 

215 ) 

216 

217 

218def _parse_sitensor33(name, data): 

219 # We expect label, index, and then the 3x3 tensor 

220 if len(data) == 10: 

221 label, index = _unmunge_label_index(data[0]) 

222 data = [label, index] + data[1:] 

223 if len(data) != 11: 

224 raise ValueError( 

225 f'Expected 11 values for {name} tensor data, got {len(data)}' 

226 ) 

227 

228 return { 

229 'atom': {'label': data[0], 'index': int(data[1])}, 

230 name: tensor33([float(x) for x in data[2:]]), 

231 } 

232 

233 

234def parse_atoms_block(block): 

235 """ 

236 Parse atoms block into data dictionary given list of record tuples. 

237 """ 

238 

239 _name, records = block 

240 

241 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3 

242 def lattice(d): 

243 return tensor33([float(x) for x in data]) 

244 

245 # Atom record: label, index, x, y, z 

246 def atom(d): 

247 return { 

248 'species': data[0], 

249 'label': data[1], 

250 'index': int(data[2]), 

251 'position': tensor31([float(x) for x in data[3:]]), 

252 } 

253 

254 def symmetry(d): 

255 return ' '.join(data) 

256 

257 tags = { 

258 'lattice': lattice, 

259 'atom': atom, 

260 'units': check_units, 

261 'symmetry': symmetry, 

262 } 

263 

264 data_dict = {} 

265 

266 for record in records: 

267 tag, data = record 

268 if tag not in data_dict: 

269 data_dict[tag] = [] 

270 data_dict[tag].append(tags[tag](data)) 

271 

272 return data_dict 

273 

274 

275def parse_generic_block(block): 

276 """ 

277 Parse any other block into data dictionary given list of record 

278 tuples. 

279 """ 

280 

281 _name, records = block 

282 

283 data_dict = {} 

284 

285 for record in records: 

286 tag, data = record 

287 

288 if tag not in data_dict: 

289 data_dict[tag] = [] 

290 

291 data_dict[tag].append(data) 

292 

293 return data_dict