Coverage for /builds/ase/ase/ase/io/extxyz.py: 82.20%

528 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3""" 

4Extended XYZ support 

5 

6Read/write files in "extended" XYZ format, storing additional 

7per-configuration information as key-value pairs on the XYZ 

8comment line, and additional per-atom properties as extra columns. 

9 

10Contributed by James Kermode <james.kermode@gmail.com> 

11""" 

12import json 

13import numbers 

14import re 

15import warnings 

16from io import StringIO, UnsupportedOperation 

17 

18import numpy as np 

19 

20from ase.atoms import Atoms 

21from ase.calculators.calculator import all_properties 

22from ase.calculators.singlepoint import SinglePointCalculator 

23from ase.constraints import FixAtoms, FixCartesian 

24from ase.io.formats import index2range 

25from ase.io.utils import ImageIterator 

26from ase.outputs import ArrayProperty, all_outputs 

27from ase.spacegroup.spacegroup import Spacegroup 

28from ase.stress import voigt_6_to_full_3x3_stress 

29from ase.utils import reader, writer 

30 

31__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

32 

33PROPERTY_NAME_MAP = {'positions': 'pos', 

34 'numbers': 'Z', 

35 'charges': 'charge', 

36 'symbols': 'species'} 

37 

38REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

39 PROPERTY_NAME_MAP.keys())) 

40 

41KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

42 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

43KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

44 + r'\s*([^\s]+)\s*') 

45KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

46 

47UNPROCESSED_KEYS = {'uid'} 

48 

49SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'} 

50 

51# Determine 'per-atom' and 'per-config' based on all_outputs shape, 

52# but filter for things in all_properties because that's what 

53# SinglePointCalculator accepts 

54per_atom_properties = [] 

55per_config_properties = [] 

56for key, val in all_outputs.items(): 

57 if key not in all_properties: 

58 continue 

59 if isinstance(val, ArrayProperty) and val.shapespec[0] == 'natoms': 

60 per_atom_properties.append(key) 

61 else: 

62 per_config_properties.append(key) 

63 

64 

65def key_val_str_to_dict(string, sep=None): 

66 """ 

67 Parse an xyz properties string in a key=value and return a dict with 

68 various values parsed to native types. 

69 

70 Accepts brackets or quotes to delimit values. Parses integers, floats 

71 booleans and arrays thereof. Arrays with 9 values whose name is listed 

72 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

73 

74 If sep is None, string will split on whitespace, otherwise will split 

75 key value pairs with the given separator. 

76 

77 """ 

78 # store the closing delimiters to match opening ones 

79 delimiters = { 

80 "'": "'", 

81 '"': '"', 

82 '{': '}', 

83 '[': ']' 

84 } 

85 

86 # Make pairs and process afterwards 

87 kv_pairs = [ 

88 [[]]] # List of characters for each entry, add a new list for new value 

89 cur_delimiter = None # push and pop closing delimiters 

90 escaped = False # add escaped sequences verbatim 

91 

92 # parse character-by-character unless someone can do nested brackets 

93 # and escape sequences in a regex 

94 for char in string.strip(): 

95 if escaped: # bypass everything if escaped 

96 kv_pairs[-1][-1].append(char) 

97 escaped = False 

98 elif char == '\\': # escape the next thing 

99 escaped = True 

100 elif cur_delimiter: # inside brackets 

101 if char == cur_delimiter: # found matching delimiter 

102 cur_delimiter = None 

103 else: 

104 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

105 elif char in delimiters: 

106 cur_delimiter = delimiters[char] # brackets or quotes 

107 elif (sep is None and char.isspace()) or char == sep: 

108 if kv_pairs == [[[]]]: # empty, beginning of string 

109 continue 

110 elif kv_pairs[-1][-1] == []: 

111 continue 

112 else: 

113 kv_pairs.append([[]]) 

114 elif char == '=': 

115 if kv_pairs[-1] == [[]]: 

116 del kv_pairs[-1] 

117 kv_pairs[-1].append([]) # value 

118 else: 

119 kv_pairs[-1][-1].append(char) 

120 

121 kv_dict = {} 

122 

123 for kv_pair in kv_pairs: 

124 if len(kv_pair) == 0: # empty line 

125 continue 

126 elif len(kv_pair) == 1: # default to True 

127 key, value = ''.join(kv_pair[0]), 'T' 

128 else: # Smush anything else with kv-splitter '=' between them 

129 key, value = ''.join(kv_pair[0]), '='.join( 

130 ''.join(x) for x in kv_pair[1:]) 

131 

132 if key.lower() not in UNPROCESSED_KEYS: 

133 # Try to convert to (arrays of) floats, ints 

134 split_value = re.findall(r'[^\s,]+', value) 

135 try: 

136 try: 

137 numvalue = np.array(split_value, dtype=int) 

138 except (ValueError, OverflowError): 

139 # don't catch errors here so it falls through to bool 

140 numvalue = np.array(split_value, dtype=float) 

141 if len(numvalue) == 1: 

142 numvalue = numvalue[0] # Only one number 

143 value = numvalue 

144 except (ValueError, OverflowError): 

145 pass # value is unchanged 

146 

147 # convert special 3x3 matrices 

148 if key in SPECIAL_3_3_KEYS: 

149 if not isinstance(value, np.ndarray) or value.shape != (9,): 

150 raise ValueError("Got info item {}, expecting special 3x3 " 

151 "matrix, but value is not in the form of " 

152 "a 9-long numerical vector".format(key)) 

153 value = np.array(value).reshape((3, 3), order='F') 

154 

155 # parse special strings as boolean or JSON 

156 if isinstance(value, str): 

157 # Parse boolean values: 

158 # T or [tT]rue or TRUE -> True 

159 # F or [fF]alse or FALSE -> False 

160 # For list: 'T T F' -> [True, True, False] 

161 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

162 # 't'/'f' not accepted 

163 str_to_bool = { 

164 'T': True, 'F': False, 'true': True, 'false': False, 

165 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

166 } 

167 try: 

168 boolvalue = [str_to_bool[vpart] for vpart in 

169 re.findall(r'[^\s,]+', value)] 

170 

171 if len(boolvalue) == 1: 

172 value = boolvalue[0] 

173 else: 

174 value = boolvalue 

175 except KeyError: 

176 # Try to parse JSON 

177 if value.startswith("_JSON "): 

178 d = json.loads(value.replace("_JSON ", "", 1)) 

179 value = np.array(d) 

180 if value.dtype.kind not in ['i', 'f', 'b']: 

181 value = d 

182 

183 kv_dict[key] = value 

184 

185 return kv_dict 

186 

187 

188def key_val_str_to_dict_regex(s): 

189 """ 

190 Parse strings in the form 'key1=value1 key2="quoted value"' 

191 """ 

192 d = {} 

193 s = s.strip() 

194 while True: 

195 # Match quoted string first, then fall through to plain key=value 

196 m = KEY_QUOTED_VALUE.match(s) 

197 if m is None: 

198 m = KEY_VALUE.match(s) 

199 if m is not None: 

200 s = KEY_VALUE.sub('', s, 1) 

201 else: 

202 # Just a key with no value 

203 m = KEY_RE.match(s) 

204 if m is not None: 

205 s = KEY_RE.sub('', s, 1) 

206 else: 

207 s = KEY_QUOTED_VALUE.sub('', s, 1) 

208 

209 if m is None: 

210 break # No more matches 

211 

212 key = m.group(1) 

213 try: 

214 value = m.group(2) 

215 except IndexError: 

216 # default value is 'T' (True) 

217 value = 'T' 

218 

219 if key.lower() not in UNPROCESSED_KEYS: 

220 # Try to convert to (arrays of) floats, ints 

221 try: 

222 numvalue = [] 

223 for x in value.split(): 

224 if x.find('.') == -1: 

225 numvalue.append(int(float(x))) 

226 else: 

227 numvalue.append(float(x)) 

228 if len(numvalue) == 1: 

229 numvalue = numvalue[0] # Only one number 

230 elif len(numvalue) == 9: 

231 # special case: 3x3 matrix, fortran ordering 

232 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

233 else: 

234 numvalue = np.array(numvalue) # vector 

235 value = numvalue 

236 except (ValueError, OverflowError): 

237 pass 

238 

239 # Parse boolean values: 'T' -> True, 'F' -> False, 

240 # 'T T F' -> [True, True, False] 

241 if isinstance(value, str): 

242 str_to_bool = {'T': True, 'F': False} 

243 

244 if len(value.split()) > 1: 

245 if all(x in str_to_bool for x in value.split()): 

246 value = [str_to_bool[x] for x in value.split()] 

247 elif value in str_to_bool: 

248 value = str_to_bool[value] 

249 

250 d[key] = value 

251 

252 return d 

253 

254 

255def escape(string): 

256 if (' ' in string or 

257 '"' in string or "'" in string or 

258 '{' in string or '}' in string or 

259 '[' in string or ']' in string): 

260 string = string.replace('"', '\\"') 

261 string = f'"{string}"' 

262 return string 

263 

264 

265def key_val_dict_to_str(dct, sep=' '): 

266 """ 

267 Convert atoms.info dictionary to extended XYZ string representation 

268 """ 

269 

270 def array_to_string(key, val): 

271 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

272 # numbers or bools), handle them here 

273 if key in SPECIAL_3_3_KEYS: 

274 # special 3x3 matrix, flatten in Fortran order 

275 val = val.reshape(val.size, order='F') 

276 if val.dtype.kind in ['i', 'f', 'b']: 

277 # numerical or bool scalars/vectors are special, for backwards 

278 # compat. 

279 if len(val.shape) == 0: 

280 # scalar 

281 val = str(known_types_to_str(val)) 

282 elif len(val.shape) == 1: 

283 # vector 

284 val = ' '.join(str(known_types_to_str(v)) for v in val) 

285 return val 

286 

287 def known_types_to_str(val): 

288 if isinstance(val, (bool, np.bool_)): 

289 return 'T' if val else 'F' 

290 elif isinstance(val, numbers.Real): 

291 return f'{val}' 

292 elif isinstance(val, Spacegroup): 

293 return val.symbol 

294 else: 

295 return val 

296 

297 if len(dct) == 0: 

298 return '' 

299 

300 string = '' 

301 for key in dct: 

302 val = dct[key] 

303 

304 if isinstance(val, np.ndarray): 

305 val = array_to_string(key, val) 

306 else: 

307 # convert any known types to string 

308 val = known_types_to_str(val) 

309 

310 if val is not None and not isinstance(val, str): 

311 # what's left is an object, try using JSON 

312 if isinstance(val, np.ndarray): 

313 val = val.tolist() 

314 try: 

315 val = '_JSON ' + json.dumps(val) 

316 # if this fails, let give up 

317 except TypeError: 

318 warnings.warn('Skipping unhashable information ' 

319 '{}'.format(key)) 

320 continue 

321 

322 key = escape(key) # escape and quote key 

323 eq = "=" 

324 # Should this really be setting empty value that's going to be 

325 # interpreted as bool True? 

326 if val is None: 

327 val = "" 

328 eq = "" 

329 val = escape(val) # escape and quote val 

330 

331 string += f'{key}{eq}{val}{sep}' 

332 

333 return string.strip() 

334 

335 

336def parse_properties(prop_str): 

337 """ 

338 Parse extended XYZ properties format string 

339 

340 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

341 NAME is the name of the property. 

342 TYPE is one of R, I, S, L for real, integer, string and logical. 

343 NCOLS is number of columns for that property. 

344 """ 

345 

346 properties = {} 

347 properties_list = [] 

348 dtypes = [] 

349 converters = [] 

350 

351 fields = prop_str.split(':') 

352 

353 def parse_bool(x): 

354 """ 

355 Parse bool to string 

356 """ 

357 return {'T': True, 'F': False, 

358 'True': True, 'False': False}.get(x) 

359 

360 fmt_map = {'R': ('d', float), 

361 'I': ('i', int), 

362 'S': (object, str), 

363 'L': ('bool', parse_bool)} 

364 

365 for name, ptype, cols in zip(fields[::3], 

366 fields[1::3], 

367 [int(x) for x in fields[2::3]]): 

368 if ptype not in ('R', 'I', 'S', 'L'): 

369 raise ValueError('Unknown property type: ' + ptype) 

370 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

371 

372 dtype, converter = fmt_map[ptype] 

373 if cols == 1: 

374 dtypes.append((name, dtype)) 

375 converters.append(converter) 

376 else: 

377 for c in range(cols): 

378 dtypes.append((name + str(c), dtype)) 

379 converters.append(converter) 

380 

381 properties[name] = (ase_name, cols) 

382 properties_list.append(name) 

383 

384 dtype = np.dtype(dtypes) 

385 return properties, properties_list, dtype, converters 

386 

387 

388def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

389 nvec=0): 

390 # comment line 

391 line = next(lines).strip() 

392 if nvec > 0: 

393 info = {'comment': line} 

394 else: 

395 info = properties_parser(line) if line else {} 

396 

397 pbc = None 

398 if 'pbc' in info: 

399 pbc = info.pop('pbc') 

400 elif 'Lattice' in info: 

401 # default pbc for extxyz file containing Lattice 

402 # is True in all directions 

403 pbc = [True, True, True] 

404 elif nvec > 0: 

405 # cell information given as pseudo-Atoms 

406 pbc = [False, False, False] 

407 

408 cell = None 

409 if 'Lattice' in info: 

410 # NB: ASE cell is transpose of extended XYZ lattice 

411 cell = info['Lattice'].T 

412 del info['Lattice'] 

413 elif nvec > 0: 

414 # cell information given as pseudo-Atoms 

415 cell = np.zeros((3, 3)) 

416 

417 if 'Properties' not in info: 

418 # Default set of properties is atomic symbols and positions only 

419 info['Properties'] = 'species:S:1:pos:R:3' 

420 properties, names, dtype, convs = parse_properties(info['Properties']) 

421 del info['Properties'] 

422 

423 data = [] 

424 for _ in range(natoms): 

425 try: 

426 line = next(lines) 

427 except StopIteration: 

428 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

429 .format(len(data), natoms)) 

430 vals = line.split() 

431 row = tuple(conv(val) for conv, val in zip(convs, vals)) 

432 data.append(row) 

433 

434 try: 

435 data = np.array(data, dtype) 

436 except TypeError: 

437 raise XYZError('Badly formatted data ' 

438 'or end of file reached before end of frame') 

439 

440 # Read VEC entries if present 

441 if nvec > 0: 

442 for ln in range(nvec): 

443 try: 

444 line = next(lines) 

445 except StopIteration: 

446 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

447 'expected {}'.format(len(cell), nvec)) 

448 entry = line.split() 

449 

450 if not entry[0].startswith('VEC'): 

451 raise XYZError(f'Expected cell vector, got {entry[0]}') 

452 

453 try: 

454 n = int(entry[0][3:]) 

455 except ValueError as e: 

456 raise XYZError('Expected VEC{}, got VEC{}' 

457 .format(ln + 1, entry[0][3:])) from e 

458 if n != ln + 1: 

459 raise XYZError('Expected VEC{}, got VEC{}' 

460 .format(ln + 1, n)) 

461 

462 cell[ln] = np.array([float(x) for x in entry[1:]]) 

463 pbc[ln] = True 

464 if nvec != pbc.count(True): 

465 raise XYZError('Problem with number of cell vectors') 

466 pbc = tuple(pbc) 

467 

468 arrays = {} 

469 for name in names: 

470 ase_name, cols = properties[name] 

471 if cols == 1: 

472 value = data[name] 

473 else: 

474 value = np.vstack([data[name + str(c)] 

475 for c in range(cols)]).T 

476 arrays[ase_name] = value 

477 

478 numbers = arrays.pop('numbers', None) 

479 symbols = arrays.pop('symbols', None) 

480 

481 if symbols is not None: 

482 symbols = [s.capitalize() for s in symbols] 

483 

484 atoms = Atoms(numbers if numbers is not None else symbols, 

485 positions=arrays.pop('positions', None), 

486 charges=arrays.pop('initial_charges', None), 

487 cell=cell, 

488 pbc=pbc, 

489 info=info) 

490 

491 # Read and set constraints 

492 if 'move_mask' in arrays: 

493 move_mask = arrays.pop('move_mask').astype(bool) 

494 if properties['move_mask'][1] == 3: 

495 cons = [] 

496 for a in range(natoms): 

497 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

498 atoms.set_constraint(cons) 

499 elif properties['move_mask'][1] == 1: 

500 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

501 else: 

502 raise XYZError('Not implemented constraint') 

503 

504 set_calc_and_arrays(atoms, arrays) 

505 return atoms 

506 

507 

508def set_calc_and_arrays(atoms, arrays): 

509 results = {} 

510 

511 for name, array in arrays.items(): 

512 if name in all_properties: 

513 results[name] = array 

514 else: 

515 # store non-standard items in atoms.arrays 

516 atoms.new_array(name, array) 

517 

518 for key in list(atoms.info): 

519 if key in per_config_properties: 

520 results[key] = atoms.info.pop(key) 

521 # special case for stress- convert to Voigt 6-element form 

522 if key == 'stress' and results[key].shape == (3, 3): 

523 stress = results[key] 

524 stress = np.array([stress[0, 0], 

525 stress[1, 1], 

526 stress[2, 2], 

527 stress[1, 2], 

528 stress[0, 2], 

529 stress[0, 1]]) 

530 results[key] = stress 

531 

532 if results: 

533 atoms.calc = SinglePointCalculator(atoms, **results) 

534 

535 

536class XYZError(IOError): 

537 pass 

538 

539 

540class XYZChunk: 

541 def __init__(self, lines, natoms): 

542 self.lines = lines 

543 self.natoms = natoms 

544 

545 def build(self): 

546 """Convert unprocessed chunk into Atoms.""" 

547 return _read_xyz_frame(iter(self.lines), self.natoms) 

548 

549 

550def ixyzchunks(fd): 

551 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

552 while True: 

553 line = next(fd).strip() # Raises StopIteration on empty file 

554 try: 

555 natoms = int(line) 

556 except ValueError: 

557 raise XYZError(f'Expected integer, found "{line}"') 

558 try: 

559 lines = [next(fd) for _ in range(1 + natoms)] 

560 except StopIteration: 

561 raise XYZError('Incomplete XYZ chunk') 

562 yield XYZChunk(lines, natoms) 

563 

564 

565iread_xyz = ImageIterator(ixyzchunks) 

566 

567 

568@reader 

569def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

570 r""" 

571 Read from a file in Extended XYZ format 

572 

573 index is the frame to read, default is last frame (index=-1). 

574 properties_parser is the parse to use when converting the properties line 

575 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

576 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

577 faster but has fewer features. 

578 

579 Extended XYZ format is an enhanced version of the `basic XYZ format 

580 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

581 columns to be present in the file for additonal per-atom properties as 

582 well as standardising the format of the comment line to include the 

583 cell lattice and other per-frame parameters. 

584 

585 It's easiest to describe the format with an example. Here is a 

586 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

587 

588 8 

589 Cubic bulk silicon cell 

590 Si 0.00000000 0.00000000 0.00000000 

591 Si 1.36000000 1.36000000 1.36000000 

592 Si 2.72000000 2.72000000 0.00000000 

593 Si 4.08000000 4.08000000 1.36000000 

594 Si 2.72000000 0.00000000 2.72000000 

595 Si 4.08000000 1.36000000 4.08000000 

596 Si 0.00000000 2.72000000 2.72000000 

597 Si 1.36000000 4.08000000 4.08000000 

598 

599 The first line is the number of atoms, followed by a comment and 

600 then one line per atom, giving the element symbol and cartesian 

601 x y, and z coordinates in Angstroms. 

602 

603 Here's the same configuration in extended XYZ format :: 

604 

605 8 

606 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

607 Si 0.00000000 0.00000000 0.00000000 

608 Si 1.36000000 1.36000000 1.36000000 

609 Si 2.72000000 2.72000000 0.00000000 

610 Si 4.08000000 4.08000000 1.36000000 

611 Si 2.72000000 0.00000000 2.72000000 

612 Si 4.08000000 1.36000000 4.08000000 

613 Si 0.00000000 2.72000000 2.72000000 

614 Si 1.36000000 4.08000000 4.08000000 

615 

616 In extended XYZ format, the comment line is replaced by a series of 

617 key/value pairs. The keys should be strings and values can be 

618 integers, reals, logicals (denoted by `T` and `F` for true and false) 

619 or strings. Quotes are required if a value contains any spaces (like 

620 `Lattice` above). There are two mandatory parameters that any 

621 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

622 e.g. `Time` in the example above --- can be added to the parameter line 

623 as needed. 

624 

625 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

626 vectors, with each vector stored as a column and the 9 values listed in 

627 Fortran column-major order, i.e. in the form :: 

628 

629 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

630 

631 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

632 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

633 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

634 third lattice vector (:math:`\mathbf{c}`). 

635 

636 The list of properties in the file is described by the `Properties` 

637 parameter, which should take the form of a series of colon separated 

638 triplets giving the name, format (`R` for real, `I` for integer) and 

639 number of columns of each property. For example:: 

640 

641 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

642 

643 indicates the first column represents atomic species, the next three 

644 columns represent atomic positions, the next three velcoities, and the 

645 last is an single integer called `select`. With this property 

646 definition, the line :: 

647 

648 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

649 

650 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

651 velocity and the `select` property set to 1. 

652 

653 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

654 :attr:`ase.atoms.Atoms.arrays` entries named 

655 `positions`, `numbers`, `masses` and `charges` respectively. 

656 

657 Additional key-value pairs in the comment line are parsed into the 

658 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

659 

660 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

661 included to ease command-line usage as the `{}` are not treated 

662 specially by the shell) 

663 - Quotes within keys or values can be escaped with `\"`. 

664 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

665 in Fortran order, as for `Lattice` above. 

666 - Otherwise, values with multiple elements are treated as 1D arrays, first 

667 assuming integer format and falling back to float if conversion is 

668 unsuccessful. 

669 - A missing value defaults to `True`, e.g. the comment line 

670 `"cutoff=3.4 have_energy"` leads to 

671 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

672 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

673 similarly, when writing, anything which does not match the criteria above 

674 is serialised as JSON. 

675 

676 The extended XYZ format is also supported by the 

677 the `Ovito <https://www.ovito.org>`_ visualisation tool. 

678 """ # noqa: E501 

679 

680 if not isinstance(index, int) and not isinstance(index, slice): 

681 raise TypeError('Index argument is neither slice nor integer!') 

682 

683 # If possible, build a partial index up to the last frame required 

684 last_frame = None 

685 if isinstance(index, int) and index >= 0: 

686 last_frame = index 

687 elif isinstance(index, slice): 

688 if index.stop is not None and index.stop >= 0: 

689 last_frame = index.stop 

690 

691 # scan through file to find where the frames start 

692 try: 

693 fileobj.seek(0) 

694 except UnsupportedOperation: 

695 fileobj = StringIO(fileobj.read()) 

696 fileobj.seek(0) 

697 frames = [] 

698 while True: 

699 frame_pos = fileobj.tell() 

700 line = fileobj.readline() 

701 if line.strip() == '': 

702 break 

703 try: 

704 natoms = int(line) 

705 except ValueError as err: 

706 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

707 .format(err)) 

708 fileobj.readline() # read comment line 

709 for _ in range(natoms): 

710 fileobj.readline() 

711 # check for VEC 

712 nvec = 0 

713 while True: 

714 lastPos = fileobj.tell() 

715 line = fileobj.readline() 

716 if line.lstrip().startswith('VEC'): 

717 nvec += 1 

718 if nvec > 3: 

719 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

720 else: 

721 fileobj.seek(lastPos) 

722 break 

723 frames.append((frame_pos, natoms, nvec)) 

724 if last_frame is not None and len(frames) > last_frame: 

725 break 

726 

727 trbl = index2range(index, len(frames)) 

728 

729 for index in trbl: 

730 frame_pos, natoms, nvec = frames[index] 

731 fileobj.seek(frame_pos) 

732 # check for consistency with frame index table 

733 assert int(fileobj.readline()) == natoms 

734 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

735 

736 

737def output_column_format(atoms, columns, arrays, write_info=True): 

738 """ 

739 Helper function to build extended XYZ comment line 

740 """ 

741 fmt_map = {'d': ('R', '%16.8f'), 

742 'f': ('R', '%16.8f'), 

743 'i': ('I', '%8d'), 

744 'O': ('S', '%s'), 

745 'S': ('S', '%s'), 

746 'U': ('S', '%-2s'), 

747 'b': ('L', ' %.1s')} 

748 

749 # NB: Lattice is stored as tranpose of ASE cell, 

750 # with Fortran array ordering 

751 lattice_str = ('Lattice="' 

752 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

753 9, order='F')]) + 

754 '"') 

755 

756 property_names = [] 

757 property_types = [] 

758 property_ncols = [] 

759 dtypes = [] 

760 formats = [] 

761 

762 for column in columns: 

763 array = arrays[column] 

764 dtype = array.dtype 

765 

766 property_name = PROPERTY_NAME_MAP.get(column, column) 

767 property_type, fmt = fmt_map[dtype.kind] 

768 property_names.append(property_name) 

769 property_types.append(property_type) 

770 

771 if (len(array.shape) == 1 

772 or (len(array.shape) == 2 and array.shape[1] == 1)): 

773 ncol = 1 

774 dtypes.append((column, dtype)) 

775 else: 

776 ncol = array.shape[1] 

777 for c in range(ncol): 

778 dtypes.append((column + str(c), dtype)) 

779 

780 formats.extend([fmt] * ncol) 

781 property_ncols.append(ncol) 

782 

783 props_str = ':'.join([':'.join(x) for x in 

784 zip(property_names, 

785 property_types, 

786 [str(nc) for nc in property_ncols])]) 

787 

788 comment_str = '' 

789 if atoms.cell.any(): 

790 comment_str += lattice_str + ' ' 

791 comment_str += f'Properties={props_str}' 

792 

793 info = {} 

794 if write_info: 

795 info.update(atoms.info) 

796 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

797 comment_str += ' ' + key_val_dict_to_str(info) 

798 

799 dtype = np.dtype(dtypes) 

800 fmt = ' '.join(formats) + '\n' 

801 

802 return comment_str, property_ncols, dtype, fmt 

803 

804 

805@writer 

806def write_xyz(fileobj, images, comment='', columns=None, 

807 write_info=True, 

808 write_results=True, plain=False, vec_cell=False): 

809 """ 

810 Write output in extended XYZ format 

811 

812 Optionally, specify which columns (arrays) to include in output, 

813 whether to write the contents of the `atoms.info` dict to the 

814 XYZ comment line (default is True), the results of any 

815 calculator attached to this Atoms. The `plain` argument 

816 can be used to write a simple XYZ file with no additional information. 

817 `vec_cell` can be used to write the cell vectors as additional 

818 pseudo-atoms. 

819 

820 See documentation for :func:`read_xyz()` for further details of the extended 

821 XYZ file format. 

822 """ 

823 

824 if hasattr(images, 'get_positions'): 

825 images = [images] 

826 

827 for atoms in images: 

828 natoms = len(atoms) 

829 

830 if write_results: 

831 calculator = atoms.calc 

832 atoms = atoms.copy() 

833 

834 save_calc_results(atoms, calculator, calc_prefix="") 

835 

836 if atoms.info.get('stress', np.array([])).shape == (6,): 

837 atoms.info['stress'] = \ 

838 voigt_6_to_full_3x3_stress(atoms.info['stress']) 

839 

840 if columns is None: 

841 fr_cols = (['symbols', 'positions', 'move_mask'] 

842 + [key for key in atoms.arrays if 

843 key not in ['symbols', 'positions', 'numbers', 

844 'species', 'pos']]) 

845 else: 

846 fr_cols = columns[:] 

847 

848 if vec_cell: 

849 plain = True 

850 

851 if plain: 

852 fr_cols = ['symbols', 'positions'] 

853 write_info = False 

854 write_results = False 

855 

856 # Move symbols and positions to first two properties 

857 if 'symbols' in fr_cols: 

858 i = fr_cols.index('symbols') 

859 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

860 

861 if 'positions' in fr_cols: 

862 i = fr_cols.index('positions') 

863 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

864 

865 # Check first column "looks like" atomic symbols 

866 if fr_cols[0] in atoms.arrays: 

867 symbols = atoms.arrays[fr_cols[0]] 

868 else: 

869 symbols = [*atoms.symbols] 

870 

871 if natoms > 0 and not isinstance(symbols[0], str): 

872 raise ValueError('First column must be symbols-like') 

873 

874 # Check second column "looks like" atomic positions 

875 pos = atoms.arrays[fr_cols[1]] 

876 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

877 raise ValueError('Second column must be position-like') 

878 

879 # if vec_cell add cell information as pseudo-atoms 

880 if vec_cell: 

881 nPBC = 0 

882 for i, b in enumerate(atoms.pbc): 

883 if not b: 

884 continue 

885 nPBC += 1 

886 symbols.append('VEC' + str(nPBC)) 

887 pos = np.vstack((pos, atoms.cell[i])) 

888 # add to natoms 

889 natoms += nPBC 

890 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

891 raise ValueError( 

892 'Pseudo Atoms containing cell have bad coords') 

893 

894 # Move mask 

895 if 'move_mask' in fr_cols: 

896 cnstr = images[0].constraints 

897 if len(cnstr) > 0: 

898 c0 = cnstr[0] 

899 if isinstance(c0, FixAtoms): 

900 cnstr = np.ones((natoms,), dtype=bool) 

901 for idx in c0.index: 

902 cnstr[idx] = False # cnstr: atoms that can be moved 

903 elif isinstance(c0, FixCartesian): 

904 masks = np.ones((natoms, 3), dtype=bool) 

905 for i in range(len(cnstr)): 

906 idx = cnstr[i].index 

907 masks[idx] = cnstr[i].mask 

908 cnstr = ~masks # cnstr: coordinates that can be moved 

909 else: 

910 fr_cols.remove('move_mask') 

911 

912 # Collect data to be written out 

913 arrays = {} 

914 for column in fr_cols: 

915 if column == 'positions': 

916 arrays[column] = pos 

917 elif column in atoms.arrays: 

918 arrays[column] = atoms.arrays[column] 

919 elif column == 'symbols': 

920 arrays[column] = np.array(symbols) 

921 elif column == 'move_mask': 

922 arrays[column] = cnstr 

923 else: 

924 raise ValueError(f'Missing array "{column}"') 

925 

926 comm, ncols, dtype, fmt = output_column_format(atoms, 

927 fr_cols, 

928 arrays, 

929 write_info) 

930 

931 if plain or comment != '': 

932 # override key/value pairs with user-speficied comment string 

933 comm = comment.rstrip() 

934 if '\n' in comm: 

935 raise ValueError('Comment line should not have line breaks.') 

936 

937 # Pack fr_cols into record array 

938 data = np.zeros(natoms, dtype) 

939 for column, ncol in zip(fr_cols, ncols): 

940 value = arrays[column] 

941 if ncol == 1: 

942 data[column] = np.squeeze(value) 

943 else: 

944 for c in range(ncol): 

945 data[column + str(c)] = value[:, c] 

946 

947 nat = natoms 

948 if vec_cell: 

949 nat -= nPBC 

950 # Write the output 

951 fileobj.write('%d\n' % nat) 

952 fileobj.write(f'{comm}\n') 

953 for i in range(natoms): 

954 fileobj.write(fmt % tuple(data[i])) 

955 

956 

957def save_calc_results(atoms, calc=None, calc_prefix=None, 

958 remove_atoms_calc=False, force=False): 

959 """Update information in atoms from results in a calculator 

960 

961 Args: 

962 atoms (ase.atoms.Atoms): Atoms object, modified in place 

963 calc (ase.calculators.Calculator, optional): calculator to take results 

964 from. Defaults to :attr:`atoms.calc` 

965 calc_prefix (str, optional): String to prefix to results names 

966 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to 

967 calculator class name 

968 remove_atoms_calc (bool): remove the calculator from the `atoms` 

969 object after saving its results. Defaults to `False`, ignored if 

970 `calc` is passed in 

971 force (bool, optional): overwrite existing fields with same name, 

972 default False 

973 """ 

974 if calc is None: 

975 calc_use = atoms.calc 

976 else: 

977 calc_use = calc 

978 

979 if calc_use is None: 

980 return None, None 

981 

982 if calc_prefix is None: 

983 calc_prefix = calc_use.__class__.__name__ + '_' 

984 

985 per_config_results = {} 

986 per_atom_results = {} 

987 for prop, value in calc_use.results.items(): 

988 if prop in per_config_properties: 

989 per_config_results[calc_prefix + prop] = value 

990 elif prop in per_atom_properties: 

991 per_atom_results[calc_prefix + prop] = value 

992 

993 if not force: 

994 if any(key in atoms.info for key in per_config_results): 

995 raise KeyError("key from calculator already exists in atoms.info") 

996 if any(key in atoms.arrays for key in per_atom_results): 

997 raise KeyError("key from calculator already exists in atoms.arrays") 

998 

999 atoms.info.update(per_config_results) 

1000 atoms.arrays.update(per_atom_results) 

1001 

1002 if remove_atoms_calc and calc is None: 

1003 atoms.calc = None 

1004 

1005 

1006# create aliases for read/write functions 

1007read_extxyz = read_xyz 

1008write_extxyz = write_xyz