Coverage for ase/io/_magres.py: 94.85%

1"""Helper functions to parse different blocks."""

3import re

5import numpy as np

8def tensor33(x):

9 return np.squeeze(np.reshape(x, (3, 3))).tolist()

12def tensor31(x):

13 return np.squeeze(np.reshape(x, (3, 1))).tolist()

16def get_version(file_contents):

17 """

18 Look for and parse the magres file format version line

19 """

21 lines = file_contents.split('\n')

22 match = re.match(r'\#\$magres-abinitio-v([0-9]+).([0-9]+)', lines[0])

24 if match:

25 version = match.groups()

26 version = tuple(vnum for vnum in version)

27 else:

28 version = None

30 return version

33def parse_blocks(file_contents):

34 """

35 Parse series of XML-like deliminated blocks into a list of

36 (block_name, contents) tuples

37 """

38 blocks_re = re.compile(

39 r'[\[<](?P<block_name>.*?)[>\]](.*?)[<\[]/' + r'(?P=block_name)[\]>]',

40 re.M | re.S,

41 )

43 blocks = blocks_re.findall(file_contents)

45 return blocks

48def parse_block(block):

49 """

50 Parse block contents into a series of (tag, data) records

51 """

53 def clean_line(line):

54 # Remove comments and whitespace at start and ends of line

55 line = re.sub('#(.*?)\n', '', line)

56 line = line.strip()

58 return line

60 name, data = block

62 lines = [clean_line(line) for line in data.split('\n')]

64 records = []

66 for line in lines:

67 xs = line.split()

69 if len(xs) > 0:

70 tag = xs[0]

71 data = xs[1:]

73 records.append((tag, data))

75 return (name, records)

78def check_units(d):

79 """

80 Verify that given units for a particular tag are correct.

81 """

83 allowed_units = {

84 'lattice': 'Angstrom',

85 'atom': 'Angstrom',

86 'ms': 'ppm',

87 'efg': 'au',

88 'efg_local': 'au',

89 'efg_nonlocal': 'au',

90 'isc': '10^19.T^2.J^-1',

91 'isc_fc': '10^19.T^2.J^-1',

92 'isc_orbital_p': '10^19.T^2.J^-1',

93 'isc_orbital_d': '10^19.T^2.J^-1',

94 'isc_spin': '10^19.T^2.J^-1',

95 'sus': '10^-6.cm^3.mol^-1',

96 'calc_cutoffenergy': 'Hartree',

97 }

99 if d[0] in d and d[1] == allowed_units[d[0]]:

100 pass

101 else:

102 raise RuntimeError(f'Unrecognized units: {d[0]} {d[1]}')

103

104 return d

105

106

107def parse_magres_block(block):

108 """

109 Parse magres block into data dictionary given list of record

110 tuples.

111 """

112

113 _name, records = block

114

115 # 3x3 tensor

116 def ntensor33(name):

117 return lambda d: {name: tensor33([float(x) for x in data])}

118

119 # Atom label, atom index and 3x3 tensor

120 def sitensor33(name):

121 return lambda d: _parse_sitensor33(name, data)

122

123 # 2x(Atom label, atom index) and 3x3 tensor

124 def sisitensor33(name):

125 return lambda d: {

126 'atom1': {'label': data[0], 'index': int(data[1])},

127 'atom2': {'label': data[2], 'index': int(data[3])},

128 name: tensor33([float(x) for x in data[4:]]),

129 }

130

131 tags = {

132 'ms': sitensor33('sigma'),

133 'sus': ntensor33('S'),

134 'efg': sitensor33('V'),

135 'efg_local': sitensor33('V'),

136 'efg_nonlocal': sitensor33('V'),

137 'isc': sisitensor33('K'),

138 'isc_fc': sisitensor33('K'),

139 'isc_spin': sisitensor33('K'),

140 'isc_orbital_p': sisitensor33('K'),

141 'isc_orbital_d': sisitensor33('K'),

142 'units': check_units,

143 }

144

145 data_dict = {}

146

147 for record in records:

148 tag, data = record

149

150 if tag not in data_dict:

151 data_dict[tag] = []

152

153 data_dict[tag].append(tags[tag](data))

154

155 return data_dict

156

157

158def _unmunge_label_index(label_index: str) -> tuple[str, str]:

159 """Splits a label_index string into a label and an index,

160 where the index is always the final 3 digits.

161

162 This function handles cases where the site label and index are combined

163 in CASTEP magres files (versions < 23),

164 e.g., 'H1222' instead of 'H1' and '222'.

165

166 Since site labels can contain numbers (e.g., H1, H2, H1a),

167 we extract the index as the final 3 digits.

168 The remaining characters form the label.

169

170 Note: Only call this function when label and index are confirmed

171 to be combined (detected by the line having 10 fields instead of 11).

172

173 Parameters

174 ----------

175 label_index : str

176 The input string containing the combined label and index

177 (e.g., 'H1222')

178

179 Returns

180 -------

181 tuple[str, str]

182 A tuple of (label, index) strings (e.g., ('H1', '222'))

183

184 Raises

185 ------

186 RuntimeError

187 If the index is >999 (not supported by this solution))

188 If invalid data format or regex match failure

189

190 Examples

191 --------

192 >>> _unmunge_label_index('H1222')

193 ('H1', '222')

194 >>> _unmunge_label_index('C201')

195 ('C', '201')

196 >>> _unmunge_label_index('H23104')

197 ('H23', '104')

198 >>> _unmunge_label_index('H1a100')

199 ('H1a', '100')

200 """

201 match = re.match(r'(.+?)(\d{3})$', label_index)

202 if match:

203 label, index = match.groups()

204 if not isinstance(label, str) or not isinstance(index, str):

205 raise RuntimeError('Regex match produced non-string values')

206 if index == '000':

207 raise RuntimeError(

208 'Index greater than 999 detected. This is not supported in '

209 'magres files with munged label and indices. '

210 'Try manually unmunging the label and index.'

211 )

212 return (label, index)

213 raise RuntimeError(

214 'Invalid data in magres block. Check the site labels and indices.'

215 )

216

217

218def _parse_sitensor33(name, data):

219 # We expect label, index, and then the 3x3 tensor

220 if len(data) == 10:

221 label, index = _unmunge_label_index(data[0])

222 data = [label, index] + data[1:]

223 if len(data) != 11:

224 raise ValueError(

225 f'Expected 11 values for {name} tensor data, got {len(data)}'

226 )

227

228 return {

229 'atom': {'label': data[0], 'index': int(data[1])},

230 name: tensor33([float(x) for x in data[2:]]),

231 }

232

233

234def parse_atoms_block(block):

235 """

236 Parse atoms block into data dictionary given list of record tuples.

237 """

238

239 _name, records = block

240

241 # Lattice record: a1, a2 a3, b1, b2, b3, c1, c2 c3

242 def lattice(d):

243 return tensor33([float(x) for x in data])

244

245 # Atom record: label, index, x, y, z

246 def atom(d):

247 return {

248 'species': data[0],

249 'label': data[1],

250 'index': int(data[2]),

251 'position': tensor31([float(x) for x in data[3:]]),

252 }

253

254 def symmetry(d):

255 return ' '.join(data)

256

257 tags = {

258 'lattice': lattice,

259 'atom': atom,

260 'units': check_units,

261 'symmetry': symmetry,

262 }

263

264 data_dict = {}

265

266 for record in records:

267 tag, data = record

268 if tag not in data_dict:

269 data_dict[tag] = []

270 data_dict[tag].append(tags[tag](data))

271

272 return data_dict

273

274

275def parse_generic_block(block):

276 """

277 Parse any other block into data dictionary given list of record

278 tuples.

279 """

280

281 _name, records = block

282

283 data_dict = {}

284

285 for record in records:

286 tag, data = record

287

288 if tag not in data_dict:

289 data_dict[tag] = []

290

291 data_dict[tag].append(data)

292

293 return data_dict

Coverage for ase / io / _magres.py: 94.85%

97 statements