Coverage for /builds/ase/ase/ase/io/rmc6f.py: 94.90%

196 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3import re 

4import time 

5 

6import numpy as np 

7 

8from ase.atoms import Atoms 

9from ase.cell import Cell 

10from ase.utils import reader, writer 

11 

12__all__ = ['read_rmc6f', 'write_rmc6f'] 

13 

14ncols2style = {9: 'no_labels', 

15 10: 'labels', 

16 11: 'magnetic'} 

17 

18 

19def _read_construct_regex(lines): 

20 """ 

21 Utility for constructing regular expressions used by reader. 

22 """ 

23 lines = [line.strip() for line in lines] 

24 lines_re = '|'.join(lines) 

25 lines_re = lines_re.replace(' ', r'\s+') 

26 lines_re = lines_re.replace('(', r'\(') 

27 lines_re = lines_re.replace(')', r'\)') 

28 return f'({lines_re})' 

29 

30 

31def _read_line_of_atoms_section(fields): 

32 """ 

33 Process `fields` line of Atoms section in rmc6f file and output extracted 

34 info as atom id (key) and list of properties for Atoms object (values). 

35 

36 Parameters 

37 ---------- 

38 fields: list[str] 

39 List of columns from line in rmc6f file. 

40 

41 

42 Returns 

43 ------ 

44 atom_id: int 

45 Atom ID 

46 properties: list[str|float] 

47 List of Atoms properties based on rmc6f style. 

48 Basically, have 1) element and fractional coordinates for 'labels' 

49 or 'no_labels' style and 2) same for 'magnetic' style except adds 

50 the spin. 

51 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style: 

52 1) [element, xf, yf, zf] 

53 2) [element, xf, yf, zf, spin] 

54 """ 

55 # Atom id 

56 atom_id = int(fields[0]) 

57 

58 # Get style used for rmc6f from file based on number of columns 

59 ncols = len(fields) 

60 style = ncols2style[ncols] 

61 

62 # Create the position dictionary 

63 properties = [] 

64 element = str(fields[1]) 

65 if style == 'no_labels': 

66 # id element xf yf zf ref_num ucell_x ucell_y ucell_z 

67 xf = float(fields[2]) 

68 yf = float(fields[3]) 

69 zf = float(fields[4]) 

70 properties = [element, xf, yf, zf] 

71 

72 elif style == 'labels': 

73 # id element label xf yf zf ref_num ucell_x ucell_y ucell_z 

74 xf = float(fields[3]) 

75 yf = float(fields[4]) 

76 zf = float(fields[5]) 

77 properties = [element, xf, yf, zf] 

78 

79 elif style == 'magnetic': 

80 # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin 

81 xf = float(fields[2]) 

82 yf = float(fields[3]) 

83 zf = float(fields[4]) 

84 spin = float(fields[10].strip("M:")) 

85 properties = [element, xf, yf, zf, spin] 

86 else: 

87 raise Exception("Unsupported style for parsing rmc6f file format.") 

88 

89 return atom_id, properties 

90 

91 

92def _read_process_rmc6f_lines_to_pos_and_cell(lines): 

93 """ 

94 Processes the lines of rmc6f file to atom position dictionary and cell 

95 

96 Parameters 

97 ---------- 

98 lines: list[str] 

99 List of lines from rmc6f file. 

100 

101 Returns 

102 ------ 

103 pos : dict{int:list[str|float]} 

104 Dict for each atom id and Atoms properties based on rmc6f style. 

105 Basically, have 1) element and fractional coordinates for 'labels' 

106 or 'no_labels' style and 2) same for 'magnetic' style except adds 

107 the spin. 

108 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style: 

109 1) pos[aid] = [element, xf, yf, zf] 

110 2) pos[aid] = [element, xf, yf, zf, spin] 

111 cell: Cell object 

112 The ASE Cell object created from cell parameters read from the 'Cell' 

113 section of rmc6f file. 

114 """ 

115 

116 # Inititalize output pos dictionary 

117 pos = {} 

118 

119 # Defined the header an section lines we process 

120 header_lines = [ 

121 "Number of atoms:", 

122 "Supercell dimensions:", 

123 "Cell (Ang/deg):", 

124 "Lattice vectors (Ang):"] 

125 sections = ["Atoms"] 

126 

127 # Construct header and sections regex 

128 header_lines_re = _read_construct_regex(header_lines) 

129 sections_re = _read_construct_regex(sections) 

130 

131 section = None 

132 header = True 

133 

134 # Remove any lines that are blank 

135 lines = [line for line in lines if line != ''] 

136 

137 # Process each line of rmc6f file 

138 pos = {} 

139 for line in lines: 

140 

141 # check if in a section 

142 m = re.match(sections_re, line) 

143 if m is not None: 

144 section = m.group(0).strip() 

145 header = False 

146 continue 

147 

148 # header section 

149 if header: 

150 field = None 

151 val = None 

152 

153 # Regex that matches whitespace-separated floats 

154 float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])' 

155 m = re.search(header_lines_re + float_list_re, line) 

156 if m is not None: 

157 field = m.group(1) 

158 val = m.group(2) 

159 

160 if field is not None and val is not None: 

161 

162 if field == "Number of atoms:": 

163 """ 

164 NOTE: Can just capture via number of atoms ingested. 

165 Maybe use in future for a check. 

166 code: natoms = int(val) 

167 """ 

168 

169 if field.startswith('Supercell'): 

170 """ 

171 NOTE: wrapping back down to unit cell is not 

172 necessarily needed for ASE object. 

173 

174 code: supercell = [int(x) for x in val.split()] 

175 """ 

176 

177 if field.startswith('Cell'): 

178 cellpar = [float(x) for x in val.split()] 

179 cell = Cell.fromcellpar(cellpar) 

180 

181 if field.startswith('Lattice'): 

182 """ 

183 NOTE: Have questions about RMC fractionalization matrix for 

184 conversion in data2config vs. the ASE matrix. 

185 Currently, just support the Cell section. 

186 """ 

187 

188 # main body section 

189 if section is not None: 

190 if section == 'Atoms': 

191 atom_id, atom_props = _read_line_of_atoms_section(line.split()) 

192 pos[atom_id] = atom_props 

193 

194 return pos, cell 

195 

196 

197def _write_output_column_format(columns, arrays): 

198 """ 

199 Helper function to build output for data columns in rmc6f format 

200 

201 Parameters 

202 ---------- 

203 columns: list[str] 

204 List of keys in arrays. Will be columns in the output file. 

205 arrays: dict{str:np.array} 

206 Dict with arrays for each column of rmc6f file that are 

207 property of Atoms object. 

208 

209 Returns 

210 ------ 

211 property_ncols : list[int] 

212 Number of columns for each property. 

213 dtype_obj: np.dtype 

214 Data type object for the columns. 

215 formats_as_str: str 

216 The format for printing the columns. 

217 

218 """ 

219 fmt_map = {'d': ('R', '%14.6f '), 

220 'f': ('R', '%14.6f '), 

221 'i': ('I', '%8d '), 

222 'O': ('S', '%s'), 

223 'S': ('S', '%s'), 

224 'U': ('S', '%s'), 

225 'b': ('L', ' %.1s ')} 

226 

227 property_types = [] 

228 property_ncols = [] 

229 dtypes = [] 

230 formats = [] 

231 

232 # Take each column and setup formatting vectors 

233 for column in columns: 

234 array = arrays[column] 

235 dtype = array.dtype 

236 

237 property_type, fmt = fmt_map[dtype.kind] 

238 property_types.append(property_type) 

239 

240 # Flags for 1d vectors 

241 is_1d = len(array.shape) == 1 

242 is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1 

243 

244 # Construct the list of key pairs of column with dtype 

245 if (is_1d or is_1d_as_2d): 

246 ncol = 1 

247 dtypes.append((column, dtype)) 

248 else: 

249 ncol = array.shape[1] 

250 for c in range(ncol): 

251 dtypes.append((column + str(c), dtype)) 

252 

253 # Add format and number of columns for this property to output array 

254 formats.extend([fmt] * ncol) 

255 property_ncols.append(ncol) 

256 

257 # Prepare outputs to correct data structure 

258 dtype_obj = np.dtype(dtypes) 

259 formats_as_str = ''.join(formats) + '\n' 

260 

261 return property_ncols, dtype_obj, formats_as_str 

262 

263 

264def _write_output(filename, header_lines, data, fmt, order=None): 

265 """ 

266 Helper function to write information to the filename 

267 

268 Parameters 

269 ---------- 

270 filename : file|str 

271 A file like object or filename 

272 header_lines : list[str] 

273 Header section of output rmc6f file 

274 data: np.array[len(atoms)] 

275 Array for the Atoms section to write to file. Has 

276 the columns that need to be written on each row 

277 fmt: str 

278 The format string to use for writing each column in 

279 the rows of data. 

280 order : list[str] 

281 If not None, gives a list of atom types for the order 

282 to write out each. 

283 """ 

284 fd = filename 

285 

286 # Write header section 

287 for line in header_lines: 

288 fd.write(f"{line} \n") 

289 

290 # If specifying the order, fix the atom id and write to file 

291 natoms = data.shape[0] 

292 if order is not None: 

293 new_id = 0 

294 for atype in order: 

295 for i in range(natoms): 

296 if atype == data[i][1]: 

297 new_id += 1 

298 data[i][0] = new_id 

299 fd.write(fmt % tuple(data[i])) 

300 # ...just write rows to file 

301 else: 

302 for i in range(natoms): 

303 fd.write(fmt % tuple(data[i])) 

304 

305 

306@reader 

307def read_rmc6f(filename, atom_type_map=None): 

308 """ 

309 Parse a RMCProfile rmc6f file into ASE Atoms object 

310 

311 Parameters 

312 ---------- 

313 filename : file|str 

314 A file like object or filename. 

315 atom_type_map: dict{str:str} 

316 Map of atom types for conversions. Mainly used if there is 

317 an atom type in the file that is not supported by ASE but 

318 want to map to a supported atom type instead. 

319 

320 Example to map deuterium to hydrogen: 

321 atom_type_map = { 'D': 'H' } 

322 

323 Returns 

324 ------ 

325 structure : Atoms 

326 The Atoms object read in from the rmc6f file. 

327 """ 

328 

329 fd = filename 

330 lines = fd.readlines() 

331 

332 # Process the rmc6f file to extract positions and cell 

333 pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines) 

334 

335 # create an atom type map if one does not exist from unique atomic symbols 

336 if atom_type_map is None: 

337 symbols = [atom[0] for atom in pos.values()] 

338 atom_type_map = {atype: atype for atype in symbols} 

339 

340 # Use map of tmp symbol to actual symbol 

341 for atom in pos.values(): 

342 atom[0] = atom_type_map[atom[0]] 

343 

344 # create Atoms from read-in data 

345 symbols = [] 

346 scaled_positions = [] 

347 spin = None 

348 magmoms = [] 

349 for atom in pos.values(): 

350 if len(atom) == 4: 

351 element, x, y, z = atom 

352 else: 

353 element, x, y, z, spin = atom 

354 

355 element = atom_type_map[element] 

356 symbols.append(element) 

357 scaled_positions.append([x, y, z]) 

358 if spin is not None: 

359 magmoms.append(spin) 

360 

361 atoms = Atoms(scaled_positions=scaled_positions, 

362 symbols=symbols, 

363 cell=cell, 

364 magmoms=magmoms, 

365 pbc=[True, True, True]) 

366 

367 return atoms 

368 

369 

370@writer 

371def write_rmc6f(filename, atoms, order=None, atom_type_map=None): 

372 """ 

373 Write output in rmc6f format - RMCProfile v6 fractional coordinates 

374 

375 Parameters 

376 ---------- 

377 filename : file|str 

378 A file like object or filename. 

379 atoms: Atoms object 

380 The Atoms object to be written. 

381 

382 order : list[str] 

383 If not None, gives a list of atom types for the order 

384 to write out each. 

385 atom_type_map: dict{str:str} 

386 Map of atom types for conversions. Mainly used if there is 

387 an atom type in the Atoms object that is a placeholder 

388 for a different atom type. This is used when the atom type 

389 is not supported by ASE but is in RMCProfile. 

390 

391 Example to map hydrogen to deuterium: 

392 atom_type_map = { 'H': 'D' } 

393 """ 

394 

395 # get atom types and how many of each (and set to order if passed) 

396 atom_types = set(atoms.symbols) 

397 if order is not None: 

398 if set(atom_types) != set(order): 

399 raise Exception("The order is not a set of the atom types.") 

400 atom_types = order 

401 

402 atom_count_dict = atoms.symbols.formula.count() 

403 natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types] 

404 

405 # create an atom type map if one does not exist from unique atomic symbols 

406 if atom_type_map is None: 

407 symbols = set(np.array(atoms.symbols)) 

408 atom_type_map = {atype: atype for atype in symbols} 

409 

410 # HEADER SECTION 

411 

412 # get type and number of each atom type 

413 atom_types_list = [atom_type_map[atype] for atype in atom_types] 

414 atom_types_present = ' '.join(atom_types_list) 

415 natom_types_present = ' '.join(natom_types) 

416 

417 header_lines = [ 

418 "(Version 6f format configuration file)", 

419 "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )", # noqa: E501 

420 "Metadata date: " + time.strftime('%d-%m-%Y'), 

421 f"Number of types of atoms: {len(atom_types)} ", 

422 f"Atom types present: {atom_types_present}", 

423 f"Number of each atom type: {natom_types_present}", 

424 "Number of moves generated: 0", 

425 "Number of moves tried: 0", 

426 "Number of moves accepted: 0", 

427 "Number of prior configuration saves: 0", 

428 f"Number of atoms: {len(atoms)}", 

429 "Supercell dimensions: 1 1 1"] 

430 

431 # magnetic moments 

432 if atoms.has('magmoms'): 

433 spin_str = "Number of spins: {}" 

434 spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments())) 

435 header_lines.extend([spin_line]) 

436 

437 density_str = "Number density (Ang^-3): {}" 

438 density_line = density_str.format(len(atoms) / atoms.get_volume()) 

439 cell_angles = [str(x) for x in atoms.cell.cellpar()] 

440 cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles) 

441 header_lines.extend([density_line, cell_line]) 

442 

443 # get lattice vectors from cell lengths and angles 

444 # NOTE: RMCProfile uses a different convention for the fractionalization 

445 # matrix 

446 

447 cell_parameters = atoms.cell.cellpar() 

448 cell = Cell.fromcellpar(cell_parameters).T 

449 x_line = ' '.join([f'{i:12.6f}' for i in cell[0]]) 

450 y_line = ' '.join([f'{i:12.6f}' for i in cell[1]]) 

451 z_line = ' '.join([f'{i:12.6f}' for i in cell[2]]) 

452 lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line] 

453 header_lines.extend(lat_lines) 

454 header_lines.extend(['Atoms:']) 

455 

456 # ATOMS SECTION 

457 

458 # create columns of data for atoms (fr_cols) 

459 fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell'] 

460 if atoms.has('magmoms'): 

461 fr_cols.extend('magmom') 

462 

463 # Collect data to be written out 

464 natoms = len(atoms) 

465 

466 arrays = {} 

467 arrays['id'] = np.array(range(1, natoms + 1, 1), int) 

468 arrays['symbols'] = np.array(atoms.symbols) 

469 arrays['ref_num'] = np.zeros(natoms, int) 

470 arrays['ref_cell'] = np.zeros((natoms, 3), int) 

471 arrays['scaled_positions'] = np.array(atoms.get_scaled_positions()) 

472 

473 # get formatting for writing output based on atom arrays 

474 ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays) 

475 

476 # Pack fr_cols into record array 

477 data = np.zeros(natoms, dtype_obj) 

478 for column, ncol in zip(fr_cols, ncols): 

479 value = arrays[column] 

480 if ncol == 1: 

481 data[column] = np.squeeze(value) 

482 else: 

483 for c in range(ncol): 

484 data[column + str(c)] = value[:, c] 

485 

486 # Use map of tmp symbol to actual symbol 

487 for i in range(natoms): 

488 data[i][1] = atom_type_map[data[i][1]] 

489 

490 # Write the output 

491 _write_output(filename, header_lines, data, fmt, order=order)