Coverage for ase / io / sdf.py: 96.67%
60 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 08:22 +0000
1"""Reads chemical data in SDF format (wraps the MDL Molfile V2000 format).
3See https://en.wikipedia.org/wiki/Chemical_table_file#SDF
4"""
6from datetime import datetime
7from typing import TextIO
9import numpy as np
11from ase.atoms import Atoms
12from ase.data import atomic_masses_iupac2016
13from ase.io.utils import connectivity2bonds, validate_comment_line
14from ase.utils import reader, writer
17def serialize_property_v2000(prop: str, data: list[tuple[int, int]]) -> str:
18 """Serialize atom-index-value pairs to a V2000 property block."""
19 block = ''
20 # Split data into up to eight pairs per chunk
21 chunks = [data[i : i + 8] for i in range(0, len(data), 8)]
23 for chunk in chunks:
24 block += f'M {prop:3}{len(chunk):3}'
26 for i_atom, value in chunk:
27 block += f' {i_atom:3} {value:3}'
29 block += '\n'
31 return block
34def get_num_atoms_sdf_v2000(first_line: str) -> int:
35 """Parse the first line extracting the number of atoms.
37 The V2000 dialect uses a fixed field length of 3, which means there
38 won't be space between the numbers if there are 100+ atoms, and
39 the format doesn't support 1000+ atoms at all.
41 http://biotech.fyicenter.com/1000024_SDF_File_Format_Specification.html
42 """
43 return int(first_line[0:3]) # first three characters
46@writer
47def write_sdf(
48 file_obj: TextIO,
49 atoms: Atoms,
50 title: str = '',
51 comment: str = '',
52 connectivity: np.ndarray | None = None,
53 record_separator: str = '$$$$\n',
54) -> None:
55 r"""Write Atoms object to SDF file in MDL Molfile V2000 format.
57 Parameters
58 ----------
59 fd : path or file object
60 A file path or writable file-like object.
62 atoms : Atoms
63 An ASE Atoms object with the atomic structure.
65 title: str
66 Optional line for molecule name.
68 comment: str
69 Optional comments.
71 connectivity: np.ndarray
72 Adjacency matrix for connectivity of atoms
73 (0 not connected, 1 connected).
75 record_separator: str
76 Separator line used between records.
77 """
78 title = validate_comment_line(title, name='Title')
79 comment = validate_comment_line(comment)
80 num_atoms = len(atoms)
82 if num_atoms > 999:
83 raise ValueError('Cannot write more than 999 atoms.')
85 if connectivity is not None:
86 bonds = connectivity2bonds(connectivity)
87 else:
88 bonds = []
90 num_bonds = len(bonds)
92 if num_bonds > 999:
93 raise ValueError('Cannot write more than 999 bonds.')
95 timestamp = datetime.now().strftime('%m%d%y%H%M')
96 file_obj.write(f'{title}\n {"ASE":>8}{timestamp}3D\n{comment}\n')
97 file_obj.write(f'{num_atoms:3}{num_bonds:3}')
98 file_obj.write(8 * ' 0' + '999 V2000\n')
100 isotope_data = []
102 for i, atom in enumerate(atoms, start=1):
103 expected_mass = atomic_masses_iupac2016[atom.number]
104 if not np.isclose(atom.mass, expected_mass, rtol=0, atol=1e-3):
105 isotope_data.append((i, int(round(atom.mass))))
107 for coord in atom.position:
108 file_obj.write(f'{coord:10.4f}')
110 file_obj.write(f' {atom.symbol:3} 0' + 11 * ' 0' + '\n')
112 for i, j in bonds:
113 file_obj.write(f'{i + 1:3}{j + 1:3} 1' + 4 * ' 0' + '\n')
115 if isotope_data:
116 file_obj.write(serialize_property_v2000('ISO', isotope_data))
118 file_obj.write(f'M END\n{record_separator}')
121@reader
122def read_sdf(file_obj: TextIO) -> Atoms:
123 """Read the sdf data and compose the corresponding Atoms object."""
124 lines = file_obj.readlines()
125 # first three lines header
126 del lines[:3]
128 num_atoms = get_num_atoms_sdf_v2000(lines.pop(0))
129 positions = []
130 symbols = []
131 for line in lines[:num_atoms]:
132 x, y, z, symbol = line.split()[:4]
133 symbols.append(symbol)
134 positions.append((float(x), float(y), float(z)))
135 return Atoms(symbols=symbols, positions=positions)