Coverage for ase / io / extxyz.py: 84.91%

530 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-04 10:20 +0000

1# fmt: off 

2 

3""" 

4Extended XYZ support 

5 

6Read/write files in "extended" XYZ format, storing additional 

7per-configuration information as key-value pairs on the XYZ 

8comment line, and additional per-atom properties as extra columns. 

9 

10Contributed by James Kermode <james.kermode@gmail.com> 

11""" 

12import json 

13import numbers 

14import re 

15import warnings 

16from collections.abc import Iterator 

17from io import StringIO, UnsupportedOperation 

18from typing import TextIO 

19 

20import numpy as np 

21 

22from ase.atoms import Atoms 

23from ase.calculators.calculator import all_properties 

24from ase.calculators.singlepoint import SinglePointCalculator 

25from ase.constraints import FixAtoms, FixCartesian 

26from ase.io.formats import index2range 

27from ase.io.utils import ImageChunk, ImageIterator 

28from ase.outputs import ArrayProperty, all_outputs 

29from ase.spacegroup.spacegroup import Spacegroup 

30from ase.stress import voigt_6_to_full_3x3_stress 

31from ase.utils import reader, writer 

32 

33__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

34 

35PROPERTY_NAME_MAP = {'positions': 'pos', 

36 'numbers': 'Z', 

37 'charges': 'charge', 

38 'symbols': 'species'} 

39 

40REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

41 PROPERTY_NAME_MAP.keys())) 

42 

43KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

44 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

45KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

46 + r'\s*([^\s]+)\s*') 

47KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

48 

49UNPROCESSED_KEYS = {'uid'} 

50 

51SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'} 

52 

53# Determine 'per-atom' and 'per-config' based on all_outputs shape, 

54# but filter for things in all_properties because that's what 

55# SinglePointCalculator accepts 

56per_atom_properties = [] 

57per_config_properties = [] 

58for key, val in all_outputs.items(): 

59 if key not in all_properties: 

60 continue 

61 if isinstance(val, ArrayProperty) and val.shapespec[0] == 'natoms': 

62 per_atom_properties.append(key) 

63 else: 

64 per_config_properties.append(key) 

65 

66 

67def key_val_str_to_dict(string, sep=None): 

68 """ 

69 Parse an xyz properties string in a key=value and return a dict with 

70 various values parsed to native types. 

71 

72 Accepts brackets or quotes to delimit values. Parses integers, floats 

73 booleans and arrays thereof. Arrays with 9 values whose name is listed 

74 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

75 

76 If sep is None, string will split on whitespace, otherwise will split 

77 key value pairs with the given separator. 

78 

79 """ 

80 # store the closing delimiters to match opening ones 

81 delimiters = { 

82 "'": "'", 

83 '"': '"', 

84 '{': '}', 

85 '[': ']' 

86 } 

87 

88 # Make pairs and process afterwards 

89 kv_pairs = [ 

90 [[]]] # List of characters for each entry, add a new list for new value 

91 cur_delimiter = None # push and pop closing delimiters 

92 escaped = False # add escaped sequences verbatim 

93 

94 # parse character-by-character unless someone can do nested brackets 

95 # and escape sequences in a regex 

96 for char in string.strip(): 

97 if escaped: # bypass everything if escaped 

98 kv_pairs[-1][-1].append(char) 

99 escaped = False 

100 elif char == '\\': # escape the next thing 

101 escaped = True 

102 elif cur_delimiter: # inside brackets 

103 if char == cur_delimiter: # found matching delimiter 

104 cur_delimiter = None 

105 else: 

106 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

107 elif char in delimiters: 

108 cur_delimiter = delimiters[char] # brackets or quotes 

109 elif (sep is None and char.isspace()) or char == sep: 

110 if kv_pairs == [[[]]]: # empty, beginning of string 

111 continue 

112 elif kv_pairs[-1][-1] == []: 

113 continue 

114 else: 

115 kv_pairs.append([[]]) 

116 elif char == '=': 

117 if kv_pairs[-1] == [[]]: 

118 del kv_pairs[-1] 

119 kv_pairs[-1].append([]) # value 

120 else: 

121 kv_pairs[-1][-1].append(char) 

122 

123 kv_dict = {} 

124 

125 for kv_pair in kv_pairs: 

126 if len(kv_pair) == 0: # empty line 

127 continue 

128 elif len(kv_pair) == 1: # default to True 

129 key, value = ''.join(kv_pair[0]), 'T' 

130 else: # Smush anything else with kv-splitter '=' between them 

131 key, value = ''.join(kv_pair[0]), '='.join( 

132 ''.join(x) for x in kv_pair[1:]) 

133 

134 if key.lower() not in UNPROCESSED_KEYS: 

135 # Try to convert to (arrays of) floats, ints 

136 split_value = re.findall(r'[^\s,]+', value) 

137 try: 

138 try: 

139 numvalue = np.array(split_value, dtype=int) 

140 except (ValueError, OverflowError): 

141 # don't catch errors here so it falls through to bool 

142 numvalue = np.array(split_value, dtype=float) 

143 if len(numvalue) == 1: 

144 numvalue = numvalue[0] # Only one number 

145 value = numvalue 

146 except (ValueError, OverflowError): 

147 pass # value is unchanged 

148 

149 # convert special 3x3 matrices 

150 if key in SPECIAL_3_3_KEYS: 

151 if not isinstance(value, np.ndarray) or value.shape != (9,): 

152 raise ValueError("Got info item {}, expecting special 3x3 " 

153 "matrix, but value is not in the form of " 

154 "a 9-long numerical vector".format(key)) 

155 value = np.array(value).reshape((3, 3), order='F') 

156 

157 # parse special strings as boolean or JSON 

158 if isinstance(value, str): 

159 # Parse boolean values: 

160 # T or [tT]rue or TRUE -> True 

161 # F or [fF]alse or FALSE -> False 

162 # For list: 'T T F' -> [True, True, False] 

163 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

164 # 't'/'f' not accepted 

165 str_to_bool = { 

166 'T': True, 'F': False, 'true': True, 'false': False, 

167 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

168 } 

169 try: 

170 boolvalue = [str_to_bool[vpart] for vpart in 

171 re.findall(r'[^\s,]+', value)] 

172 

173 if len(boolvalue) == 1: 

174 value = boolvalue[0] 

175 else: 

176 value = boolvalue 

177 except KeyError: 

178 # Try to parse JSON 

179 if value.startswith("_JSON "): 

180 d = json.loads(value.replace("_JSON ", "", 1)) 

181 value = np.array(d) 

182 if value.dtype.kind not in ['i', 'f', 'b']: 

183 value = d 

184 

185 kv_dict[key] = value 

186 

187 return kv_dict 

188 

189 

190def key_val_str_to_dict_regex(s): 

191 """ 

192 Parse strings in the form 'key1=value1 key2="quoted value"' 

193 """ 

194 d = {} 

195 s = s.strip() 

196 while True: 

197 # Match quoted string first, then fall through to plain key=value 

198 m = KEY_QUOTED_VALUE.match(s) 

199 if m is None: 

200 m = KEY_VALUE.match(s) 

201 if m is not None: 

202 s = KEY_VALUE.sub('', s, 1) 

203 else: 

204 # Just a key with no value 

205 m = KEY_RE.match(s) 

206 if m is not None: 

207 s = KEY_RE.sub('', s, 1) 

208 else: 

209 s = KEY_QUOTED_VALUE.sub('', s, 1) 

210 

211 if m is None: 

212 break # No more matches 

213 

214 key = m.group(1) 

215 try: 

216 value = m.group(2) 

217 except IndexError: 

218 # default value is 'T' (True) 

219 value = 'T' 

220 

221 if key.lower() not in UNPROCESSED_KEYS: 

222 # Try to convert to (arrays of) floats, ints 

223 try: 

224 numvalue = [] 

225 for x in value.split(): 

226 if x.find('.') == -1: 

227 numvalue.append(int(float(x))) 

228 else: 

229 numvalue.append(float(x)) 

230 if len(numvalue) == 1: 

231 numvalue = numvalue[0] # Only one number 

232 elif len(numvalue) == 9: 

233 # special case: 3x3 matrix, fortran ordering 

234 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

235 else: 

236 numvalue = np.array(numvalue) # vector 

237 value = numvalue 

238 except (ValueError, OverflowError): 

239 pass 

240 

241 # Parse boolean values: 'T' -> True, 'F' -> False, 

242 # 'T T F' -> [True, True, False] 

243 if isinstance(value, str): 

244 str_to_bool = {'T': True, 'F': False} 

245 

246 if len(value.split()) > 1: 

247 if all(x in str_to_bool for x in value.split()): 

248 value = [str_to_bool[x] for x in value.split()] 

249 elif value in str_to_bool: 

250 value = str_to_bool[value] 

251 

252 d[key] = value 

253 

254 return d 

255 

256 

257def escape(string): 

258 if (' ' in string or 

259 '"' in string or "'" in string or 

260 '{' in string or '}' in string or 

261 '[' in string or ']' in string): 

262 string = string.replace('"', '\\"') 

263 string = f'"{string}"' 

264 return string 

265 

266 

267def key_val_dict_to_str(dct, sep=' '): 

268 """ 

269 Convert atoms.info dictionary to extended XYZ string representation 

270 """ 

271 

272 def array_to_string(key, val): 

273 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

274 # numbers or bools), handle them here 

275 if key in SPECIAL_3_3_KEYS: 

276 # special 3x3 matrix, flatten in Fortran order 

277 val = val.reshape(val.size, order='F') 

278 if val.dtype.kind in ['i', 'f', 'b']: 

279 # numerical or bool scalars/vectors are special, for backwards 

280 # compat. 

281 if len(val.shape) == 0: 

282 # scalar 

283 val = str(known_types_to_str(val)) 

284 elif len(val.shape) == 1: 

285 # vector 

286 val = ' '.join(str(known_types_to_str(v)) for v in val) 

287 return val 

288 

289 def known_types_to_str(val): 

290 if isinstance(val, (bool, np.bool_)): 

291 return 'T' if val else 'F' 

292 elif isinstance(val, numbers.Real): 

293 return f'{val}' 

294 elif isinstance(val, Spacegroup): 

295 return val.symbol 

296 else: 

297 return val 

298 

299 if len(dct) == 0: 

300 return '' 

301 

302 string = '' 

303 for key in dct: 

304 val = dct[key] 

305 

306 if isinstance(val, np.ndarray): 

307 val = array_to_string(key, val) 

308 else: 

309 # convert any known types to string 

310 val = known_types_to_str(val) 

311 

312 if val is not None and not isinstance(val, str): 

313 # what's left is an object, try using JSON 

314 if isinstance(val, np.ndarray): 

315 val = val.tolist() 

316 try: 

317 val = '_JSON ' + json.dumps(val) 

318 # if this fails, let give up 

319 except TypeError: 

320 warnings.warn('Skipping unhashable information ' 

321 '{}'.format(key)) 

322 continue 

323 

324 key = escape(key) # escape and quote key 

325 eq = "=" 

326 # Should this really be setting empty value that's going to be 

327 # interpreted as bool True? 

328 if val is None: 

329 val = "" 

330 eq = "" 

331 val = escape(val) # escape and quote val 

332 

333 string += f'{key}{eq}{val}{sep}' 

334 

335 return string.strip() 

336 

337 

338def parse_properties(prop_str): 

339 """ 

340 Parse extended XYZ properties format string 

341 

342 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

343 NAME is the name of the property. 

344 TYPE is one of R, I, S, L for real, integer, string and logical. 

345 NCOLS is number of columns for that property. 

346 """ 

347 

348 properties = {} 

349 properties_list = [] 

350 dtypes = [] 

351 converters = [] 

352 

353 fields = prop_str.split(':') 

354 

355 def parse_bool(x): 

356 """ 

357 Parse bool to string 

358 """ 

359 return {'T': True, 'F': False, 

360 'True': True, 'False': False}.get(x) 

361 

362 fmt_map = {'R': ('d', float), 

363 'I': ('i', int), 

364 'S': (object, str), 

365 'L': ('bool', parse_bool)} 

366 

367 for name, ptype, cols in zip(fields[::3], 

368 fields[1::3], 

369 [int(x) for x in fields[2::3]]): 

370 if ptype not in ('R', 'I', 'S', 'L'): 

371 raise ValueError('Unknown property type: ' + ptype) 

372 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

373 

374 dtype, converter = fmt_map[ptype] 

375 if cols == 1: 

376 dtypes.append((name, dtype)) 

377 converters.append(converter) 

378 else: 

379 for c in range(cols): 

380 dtypes.append((name + str(c), dtype)) 

381 converters.append(converter) 

382 

383 properties[name] = (ase_name, cols) 

384 properties_list.append(name) 

385 

386 dtype = np.dtype(dtypes) 

387 return properties, properties_list, dtype, converters 

388 

389 

390def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

391 nvec=0): 

392 # comment line 

393 line = next(lines).strip() 

394 if nvec > 0: 

395 info = {'comment': line} 

396 else: 

397 info = properties_parser(line) if line else {} 

398 

399 pbc = None 

400 if 'pbc' in info: 

401 pbc = info.pop('pbc') 

402 elif 'Lattice' in info: 

403 # default pbc for extxyz file containing Lattice 

404 # is True in all directions 

405 pbc = [True, True, True] 

406 elif nvec > 0: 

407 # cell information given as pseudo-Atoms 

408 pbc = [False, False, False] 

409 

410 cell = None 

411 if 'Lattice' in info: 

412 # NB: ASE cell is transpose of extended XYZ lattice 

413 cell = info['Lattice'].T 

414 del info['Lattice'] 

415 elif nvec > 0: 

416 # cell information given as pseudo-Atoms 

417 cell = np.zeros((3, 3)) 

418 

419 if 'Properties' not in info: 

420 # Default set of properties is atomic symbols and positions only 

421 info['Properties'] = 'species:S:1:pos:R:3' 

422 properties, names, dtype, convs = parse_properties(info['Properties']) 

423 del info['Properties'] 

424 

425 data = [] 

426 for _ in range(natoms): 

427 try: 

428 line = next(lines) 

429 except StopIteration: 

430 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

431 .format(len(data), natoms)) 

432 vals = line.split() 

433 row = tuple(conv(val) for conv, val in zip(convs, vals)) 

434 data.append(row) 

435 

436 try: 

437 data = np.array(data, dtype) 

438 except TypeError: 

439 raise XYZError('Badly formatted data ' 

440 'or end of file reached before end of frame') 

441 

442 # Read VEC entries if present 

443 if nvec > 0: 

444 for ln in range(nvec): 

445 try: 

446 line = next(lines) 

447 except StopIteration: 

448 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

449 'expected {}'.format(len(cell), nvec)) 

450 entry = line.split() 

451 

452 if not entry[0].startswith('VEC'): 

453 raise XYZError(f'Expected cell vector, got {entry[0]}') 

454 

455 try: 

456 n = int(entry[0][3:]) 

457 except ValueError as e: 

458 raise XYZError('Expected VEC{}, got VEC{}' 

459 .format(ln + 1, entry[0][3:])) from e 

460 if n != ln + 1: 

461 raise XYZError('Expected VEC{}, got VEC{}' 

462 .format(ln + 1, n)) 

463 

464 cell[ln] = np.array([float(x) for x in entry[1:]]) 

465 pbc[ln] = True 

466 if nvec != pbc.count(True): 

467 raise XYZError('Problem with number of cell vectors') 

468 pbc = tuple(pbc) 

469 

470 arrays = {} 

471 for name in names: 

472 ase_name, cols = properties[name] 

473 if cols == 1: 

474 value = data[name] 

475 else: 

476 value = np.vstack([data[name + str(c)] 

477 for c in range(cols)]).T 

478 arrays[ase_name] = value 

479 

480 numbers = arrays.pop('numbers', None) 

481 symbols = arrays.pop('symbols', None) 

482 

483 if symbols is not None: 

484 symbols = [s.capitalize() for s in symbols] 

485 

486 atoms = Atoms(numbers if numbers is not None else symbols, 

487 positions=arrays.pop('positions', None), 

488 charges=arrays.pop('initial_charges', None), 

489 cell=cell, 

490 pbc=pbc, 

491 info=info) 

492 

493 # Read and set constraints 

494 if 'move_mask' in arrays: 

495 move_mask = arrays.pop('move_mask').astype(bool) 

496 if properties['move_mask'][1] == 3: 

497 cons = [] 

498 for a in range(natoms): 

499 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

500 atoms.set_constraint(cons) 

501 elif properties['move_mask'][1] == 1: 

502 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

503 else: 

504 raise XYZError('Not implemented constraint') 

505 

506 set_calc_and_arrays(atoms, arrays) 

507 return atoms 

508 

509 

510def set_calc_and_arrays(atoms, arrays): 

511 results = {} 

512 

513 for name, array in arrays.items(): 

514 if name in all_properties: 

515 results[name] = array 

516 else: 

517 # store non-standard items in atoms.arrays 

518 atoms.new_array(name, array) 

519 

520 for key in list(atoms.info): 

521 if key in per_config_properties: 

522 results[key] = atoms.info.pop(key) 

523 # special case for stress- convert to Voigt 6-element form 

524 if key == 'stress' and results[key].shape == (3, 3): 

525 stress = results[key] 

526 stress = np.array([stress[0, 0], 

527 stress[1, 1], 

528 stress[2, 2], 

529 stress[1, 2], 

530 stress[0, 2], 

531 stress[0, 1]]) 

532 results[key] = stress 

533 

534 if results: 

535 atoms.calc = SinglePointCalculator(atoms, **results) 

536 

537 

538class XYZError(IOError): 

539 pass 

540 

541 

542class XYZChunk(ImageChunk): 

543 def __init__(self, fd: TextIO, pos: int) -> None: 

544 self.fd = fd 

545 self.pos = pos 

546 

547 def build(self, **kwargs) -> Atoms: 

548 """Convert unprocessed chunk into Atoms.""" 

549 self.fd.seek(self.pos) 

550 natoms = int(self.fd.readline().strip()[0]) 

551 lines = [self.fd.readline() for _ in range(1 + natoms)] 

552 return _read_xyz_frame(iter(lines), natoms, **kwargs) 

553 

554 

555def ixyzchunks(fd: TextIO) -> Iterator[XYZChunk]: 

556 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

557 pos = fd.tell() 

558 while line := fd.readline(): 

559 natoms = int(line.strip()[0]) 

560 _ = [fd.readline() for _ in range(1 + natoms)] 

561 yield XYZChunk(fd, pos) 

562 pos = fd.tell() 

563 

564 

565iread_xyz = ImageIterator(ixyzchunks) 

566 

567 

568@reader 

569def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

570 r""" 

571 Read from a file in Extended XYZ format 

572 

573 index is the frame to read, default is last frame (index=-1). 

574 properties_parser is the parse to use when converting the properties line 

575 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

576 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

577 faster but has fewer features. 

578 

579 Extended XYZ format is an enhanced version of the `basic XYZ format 

580 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

581 columns to be present in the file for additonal per-atom properties as 

582 well as standardising the format of the comment line to include the 

583 cell lattice and other per-frame parameters. 

584 

585 It's easiest to describe the format with an example. Here is a 

586 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

587 

588 8 

589 Cubic bulk silicon cell 

590 Si 0.00000000 0.00000000 0.00000000 

591 Si 1.36000000 1.36000000 1.36000000 

592 Si 2.72000000 2.72000000 0.00000000 

593 Si 4.08000000 4.08000000 1.36000000 

594 Si 2.72000000 0.00000000 2.72000000 

595 Si 4.08000000 1.36000000 4.08000000 

596 Si 0.00000000 2.72000000 2.72000000 

597 Si 1.36000000 4.08000000 4.08000000 

598 

599 The first line is the number of atoms, followed by a comment and 

600 then one line per atom, giving the element symbol and cartesian 

601 x y, and z coordinates in Angstroms. 

602 

603 Here's the same configuration in extended XYZ format :: 

604 

605 8 

606 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

607 Si 0.00000000 0.00000000 0.00000000 

608 Si 1.36000000 1.36000000 1.36000000 

609 Si 2.72000000 2.72000000 0.00000000 

610 Si 4.08000000 4.08000000 1.36000000 

611 Si 2.72000000 0.00000000 2.72000000 

612 Si 4.08000000 1.36000000 4.08000000 

613 Si 0.00000000 2.72000000 2.72000000 

614 Si 1.36000000 4.08000000 4.08000000 

615 

616 In extended XYZ format, the comment line is replaced by a series of 

617 key/value pairs. The keys should be strings and values can be 

618 integers, reals, logicals (denoted by `T` and `F` for true and false) 

619 or strings. Quotes are required if a value contains any spaces (like 

620 `Lattice` above). There are two mandatory parameters that any 

621 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

622 e.g. `Time` in the example above --- can be added to the parameter line 

623 as needed. 

624 

625 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

626 vectors, with each vector stored as a column and the 9 values listed in 

627 Fortran column-major order, i.e. in the form :: 

628 

629 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

630 

631 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

632 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

633 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

634 third lattice vector (:math:`\mathbf{c}`). 

635 

636 The list of properties in the file is described by the `Properties` 

637 parameter, which should take the form of a series of colon separated 

638 triplets giving the name, format (`R` for real, `I` for integer) and 

639 number of columns of each property. For example:: 

640 

641 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

642 

643 indicates the first column represents atomic species, the next three 

644 columns represent atomic positions, the next three velcoities, and the 

645 last is an single integer called `select`. With this property 

646 definition, the line :: 

647 

648 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

649 

650 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

651 velocity and the `select` property set to 1. 

652 

653 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

654 :attr:`ase.atoms.Atoms.arrays` entries named 

655 `positions`, `numbers`, `masses` and `charges` respectively. 

656 

657 Additional key-value pairs in the comment line are parsed into the 

658 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

659 

660 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

661 included to ease command-line usage as the `{}` are not treated 

662 specially by the shell) 

663 - Quotes within keys or values can be escaped with `\"`. 

664 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

665 in Fortran order, as for `Lattice` above. 

666 - Otherwise, values with multiple elements are treated as 1D arrays, first 

667 assuming integer format and falling back to float if conversion is 

668 unsuccessful. 

669 - A missing value defaults to `True`, e.g. the comment line 

670 `"cutoff=3.4 have_energy"` leads to 

671 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

672 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

673 similarly, when writing, anything which does not match the criteria above 

674 is serialised as JSON. 

675 

676 The extended XYZ format is also supported by the 

677 the `Ovito <https://www.ovito.org>`_ visualisation tool. 

678 """ # noqa: E501 

679 

680 if not isinstance(index, int) and not isinstance(index, slice): 

681 raise TypeError('Index argument is neither slice nor integer!') 

682 

683 # If possible, build a partial index up to the last frame required 

684 last_frame = None 

685 if isinstance(index, int) and index >= 0: 

686 last_frame = index 

687 elif isinstance(index, slice): 

688 if index.stop is not None and index.stop >= 0: 

689 last_frame = index.stop 

690 

691 # scan through file to find where the frames start 

692 try: 

693 fileobj.seek(0) 

694 except UnsupportedOperation: 

695 fileobj = StringIO(fileobj.read()) 

696 fileobj.seek(0) 

697 frames = [] 

698 while True: 

699 frame_pos = fileobj.tell() 

700 line = fileobj.readline() 

701 if line.strip() == '': 

702 break 

703 try: 

704 natoms = int(line) 

705 except ValueError as err: 

706 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

707 .format(err)) 

708 fileobj.readline() # read comment line 

709 for _ in range(natoms): 

710 fileobj.readline() 

711 # check for VEC 

712 nvec = 0 

713 while True: 

714 lastPos = fileobj.tell() 

715 line = fileobj.readline() 

716 if line.lstrip().startswith('VEC'): 

717 nvec += 1 

718 if nvec > 3: 

719 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

720 else: 

721 fileobj.seek(lastPos) 

722 break 

723 frames.append((frame_pos, natoms, nvec)) 

724 if last_frame is not None and len(frames) > last_frame: 

725 break 

726 

727 trbl = index2range(index, len(frames)) 

728 

729 for index in trbl: 

730 frame_pos, natoms, nvec = frames[index] 

731 fileobj.seek(frame_pos) 

732 # check for consistency with frame index table 

733 assert int(fileobj.readline()) == natoms 

734 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

735 

736 

737def output_column_format(atoms, columns, arrays, write_info=True): 

738 """ 

739 Helper function to build extended XYZ comment line 

740 """ 

741 fmt_map = {'d': ('R', '%16.8f'), 

742 'f': ('R', '%16.8f'), 

743 'i': ('I', '%8d'), 

744 'O': ('S', '%s'), 

745 'S': ('S', '%s'), 

746 'U': ('S', '%-2s'), 

747 'b': ('L', ' %.1s')} 

748 

749 # NB: Lattice is stored as tranpose of ASE cell, 

750 # with Fortran array ordering 

751 lattice_str = ('Lattice="' 

752 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

753 9, order='F')]) + 

754 '"') 

755 

756 property_names = [] 

757 property_types = [] 

758 property_ncols = [] 

759 dtypes = [] 

760 formats = [] 

761 

762 for column in columns: 

763 array = arrays[column] 

764 dtype = array.dtype 

765 

766 property_name = PROPERTY_NAME_MAP.get(column, column) 

767 property_type, fmt = fmt_map[dtype.kind] 

768 property_names.append(property_name) 

769 property_types.append(property_type) 

770 

771 if (len(array.shape) == 1 

772 or (len(array.shape) == 2 and array.shape[1] == 1)): 

773 ncol = 1 

774 dtypes.append((column, dtype)) 

775 else: 

776 ncol = array.shape[1] 

777 for c in range(ncol): 

778 dtypes.append((column + str(c), dtype)) 

779 

780 formats.extend([fmt] * ncol) 

781 property_ncols.append(ncol) 

782 

783 props_str = ':'.join([':'.join(x) for x in 

784 zip(property_names, 

785 property_types, 

786 [str(nc) for nc in property_ncols])]) 

787 

788 comment_str = '' 

789 if atoms.cell.any(): 

790 comment_str += lattice_str + ' ' 

791 comment_str += f'Properties={props_str}' 

792 

793 info = {} 

794 if write_info: 

795 info.update(atoms.info) 

796 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

797 comment_str += ' ' + key_val_dict_to_str(info) 

798 

799 dtype = np.dtype(dtypes) 

800 fmt = ' '.join(formats) + '\n' 

801 

802 return comment_str, property_ncols, dtype, fmt 

803 

804 

805def _make_move_mask(atoms: Atoms) -> np.ndarray: 

806 natoms = len(atoms) 

807 cnstr = atoms.constraints 

808 cnstr = [_ for _ in cnstr if isinstance(_, (FixAtoms, FixCartesian))] 

809 if any(isinstance(_, FixCartesian) for _ in cnstr): 

810 move_mask = np.ones((natoms, 3), dtype=bool) 

811 else: 

812 move_mask = np.ones((natoms,), dtype=bool) 

813 for c0 in cnstr: 

814 if isinstance(c0, FixAtoms): 

815 move_mask[c0.index] = False 

816 elif isinstance(c0, FixCartesian): 

817 # The `False` elements of `move_mask` should be kept. 

818 move_mask[c0.index] = ~c0.mask & move_mask[c0.index] 

819 return move_mask 

820 

821 

822@writer 

823def write_xyz(fileobj, images, comment='', columns=None, 

824 write_info=True, 

825 write_results=True, plain=False, vec_cell=False): 

826 """ 

827 Write output in extended XYZ format 

828 

829 ``images`` may be Atoms or any Iterable[Atoms]. 

830 

831 Optionally, specify which columns (arrays) to include in output, 

832 whether to write the contents of the `atoms.info` dict to the 

833 XYZ comment line (default is True), the results of any 

834 calculator attached to this Atoms. The `plain` argument 

835 can be used to write a simple XYZ file with no additional information. 

836 `vec_cell` can be used to write the cell vectors as additional 

837 pseudo-atoms. 

838 

839 See documentation for :func:`read_xyz()` for further details of the extended 

840 XYZ file format. 

841 """ 

842 

843 if hasattr(images, 'get_positions'): 

844 images = [images] 

845 

846 for atoms in images: 

847 natoms = len(atoms) 

848 

849 if write_results: 

850 calculator = atoms.calc 

851 atoms = atoms.copy() 

852 

853 save_calc_results(atoms, calculator, calc_prefix="") 

854 

855 if atoms.info.get('stress', np.array([])).shape == (6,): 

856 atoms.info['stress'] = \ 

857 voigt_6_to_full_3x3_stress(atoms.info['stress']) 

858 

859 if columns is None: 

860 fr_cols = (['symbols', 'positions', 'move_mask'] 

861 + [key for key in atoms.arrays if 

862 key not in ['symbols', 'positions', 'numbers', 

863 'species', 'pos']]) 

864 else: 

865 fr_cols = columns[:] 

866 

867 if vec_cell: 

868 plain = True 

869 

870 if plain: 

871 fr_cols = ['symbols', 'positions'] 

872 write_info = False 

873 write_results = False 

874 

875 # Move symbols and positions to first two properties 

876 if 'symbols' in fr_cols: 

877 i = fr_cols.index('symbols') 

878 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

879 

880 if 'positions' in fr_cols: 

881 i = fr_cols.index('positions') 

882 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

883 

884 # Check first column "looks like" atomic symbols 

885 if fr_cols[0] in atoms.arrays: 

886 symbols = atoms.arrays[fr_cols[0]] 

887 else: 

888 symbols = [*atoms.symbols] 

889 

890 if natoms > 0 and not isinstance(symbols[0], str): 

891 raise ValueError('First column must be symbols-like') 

892 

893 # Check second column "looks like" atomic positions 

894 pos = atoms.arrays[fr_cols[1]] 

895 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

896 raise ValueError('Second column must be position-like') 

897 

898 # if vec_cell add cell information as pseudo-atoms 

899 if vec_cell: 

900 nPBC = 0 

901 for i, b in enumerate(atoms.pbc): 

902 if not b: 

903 continue 

904 nPBC += 1 

905 symbols.append('VEC' + str(nPBC)) 

906 pos = np.vstack((pos, atoms.cell[i])) 

907 # add to natoms 

908 natoms += nPBC 

909 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

910 raise ValueError( 

911 'Pseudo Atoms containing cell have bad coords') 

912 

913 # Move mask 

914 if 'move_mask' in fr_cols: 

915 move_mask = _make_move_mask(atoms) 

916 if np.all(move_mask): 

917 fr_cols.remove('move_mask') 

918 

919 # Collect data to be written out 

920 arrays = {} 

921 for column in fr_cols: 

922 if column == 'positions': 

923 arrays[column] = pos 

924 elif column in atoms.arrays: 

925 arrays[column] = atoms.arrays[column] 

926 elif column == 'symbols': 

927 arrays[column] = np.array(symbols) 

928 elif column == 'move_mask': 

929 arrays[column] = move_mask 

930 else: 

931 raise ValueError(f'Missing array "{column}"') 

932 

933 comm, ncols, dtype, fmt = output_column_format(atoms, 

934 fr_cols, 

935 arrays, 

936 write_info) 

937 

938 if plain or comment != '': 

939 # override key/value pairs with user-speficied comment string 

940 comm = comment.rstrip() 

941 if '\n' in comm: 

942 raise ValueError('Comment line should not have line breaks.') 

943 

944 # Pack fr_cols into record array 

945 data = np.zeros(natoms, dtype) 

946 for column, ncol in zip(fr_cols, ncols): 

947 value = arrays[column] 

948 if ncol == 1: 

949 data[column] = np.squeeze(value) 

950 else: 

951 for c in range(ncol): 

952 data[column + str(c)] = value[:, c] 

953 

954 nat = natoms 

955 if vec_cell: 

956 nat -= nPBC 

957 # Write the output 

958 fileobj.write('%d\n' % nat) 

959 fileobj.write(f'{comm}\n') 

960 for i in range(natoms): 

961 fileobj.write(fmt % tuple(data[i])) 

962 

963 

964def save_calc_results(atoms, calc=None, calc_prefix=None, 

965 remove_atoms_calc=False, force=False): 

966 """Update information in atoms from results in a calculator 

967 

968 Args: 

969 atoms (ase.atoms.Atoms): Atoms object, modified in place 

970 calc (ase.calculators.Calculator, optional): calculator to take results 

971 from. Defaults to :attr:`atoms.calc` 

972 calc_prefix (str, optional): String to prefix to results names 

973 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to 

974 calculator class name 

975 remove_atoms_calc (bool): remove the calculator from the `atoms` 

976 object after saving its results. Defaults to `False`, ignored if 

977 `calc` is passed in 

978 force (bool, optional): overwrite existing fields with same name, 

979 default False 

980 """ 

981 if calc is None: 

982 calc_use = atoms.calc 

983 else: 

984 calc_use = calc 

985 

986 if calc_use is None: 

987 return None, None 

988 

989 if calc_prefix is None: 

990 calc_prefix = calc_use.__class__.__name__ + '_' 

991 

992 per_config_results = {} 

993 per_atom_results = {} 

994 for prop, value in calc_use.results.items(): 

995 if prop in per_config_properties: 

996 per_config_results[calc_prefix + prop] = value 

997 elif prop in per_atom_properties: 

998 per_atom_results[calc_prefix + prop] = value 

999 

1000 if not force: 

1001 if any(key in atoms.info for key in per_config_results): 

1002 raise KeyError("key from calculator already exists in atoms.info") 

1003 if any(key in atoms.arrays for key in per_atom_results): 

1004 raise KeyError("key from calculator already exists in atoms.arrays") 

1005 

1006 atoms.info.update(per_config_results) 

1007 atoms.arrays.update(per_atom_results) 

1008 

1009 if remove_atoms_calc and calc is None: 

1010 atoms.calc = None 

1011 

1012 

1013# create aliases for read/write functions 

1014read_extxyz = read_xyz 

1015write_extxyz = write_xyz