Source code for ase.io.sdf
"""Reads chemical data in SDF format (wraps the MDL Molfile V2000 format).
See https://en.wikipedia.org/wiki/Chemical_table_file#SDF
"""
from datetime import datetime
from typing import TextIO
import numpy as np
from ase.atoms import Atoms
from ase.data import atomic_masses_iupac2016
from ase.io.utils import connectivity2bonds, validate_comment_line
from ase.utils import reader, writer
def serialize_property_v2000(prop: str, data: list[tuple[int, int]]) -> str:
"""Serialize atom-index-value pairs to a V2000 property block."""
block = ''
# Split data into up to eight pairs per chunk
chunks = [data[i : i + 8] for i in range(0, len(data), 8)]
for chunk in chunks:
block += f'M {prop:3}{len(chunk):3}'
for i_atom, value in chunk:
block += f' {i_atom:3} {value:3}'
block += '\n'
return block
def get_num_atoms_sdf_v2000(first_line: str) -> int:
"""Parse the first line extracting the number of atoms.
The V2000 dialect uses a fixed field length of 3, which means there
won't be space between the numbers if there are 100+ atoms, and
the format doesn't support 1000+ atoms at all.
http://biotech.fyicenter.com/1000024_SDF_File_Format_Specification.html
"""
return int(first_line[0:3]) # first three characters
[docs]
@writer
def write_sdf(
file_obj: TextIO,
atoms: Atoms,
title: str = '',
comment: str = '',
connectivity: np.ndarray | None = None,
record_separator: str = '$$$$\n',
) -> None:
r"""Write Atoms object to SDF file in MDL Molfile V2000 format.
Parameters
----------
fd : path or file object
A file path or writable file-like object.
atoms : Atoms
An ASE Atoms object with the atomic structure.
title: str
Optional line for molecule name.
comment: str
Optional comments.
connectivity: np.ndarray
Adjacency matrix for connectivity of atoms
(0 not connected, 1 connected).
record_separator: str
Separator line used between records.
"""
title = validate_comment_line(title, name='Title')
comment = validate_comment_line(comment)
num_atoms = len(atoms)
if num_atoms > 999:
raise ValueError('Cannot write more than 999 atoms.')
if connectivity is not None:
bonds = connectivity2bonds(connectivity)
else:
bonds = []
num_bonds = len(bonds)
if num_bonds > 999:
raise ValueError('Cannot write more than 999 bonds.')
timestamp = datetime.now().strftime('%m%d%y%H%M')
file_obj.write(f'{title}\n {"ASE":>8}{timestamp}3D\n{comment}\n')
file_obj.write(f'{num_atoms:3}{num_bonds:3}')
file_obj.write(8 * ' 0' + '999 V2000\n')
isotope_data = []
for i, atom in enumerate(atoms, start=1):
expected_mass = atomic_masses_iupac2016[atom.number]
if not np.isclose(atom.mass, expected_mass, rtol=0, atol=1e-3):
isotope_data.append((i, int(round(atom.mass))))
for coord in atom.position:
file_obj.write(f'{coord:10.4f}')
file_obj.write(f' {atom.symbol:3} 0' + 11 * ' 0' + '\n')
for i, j in bonds:
file_obj.write(f'{i + 1:3}{j + 1:3} 1' + 4 * ' 0' + '\n')
if isotope_data:
file_obj.write(serialize_property_v2000('ISO', isotope_data))
file_obj.write(f'M END\n{record_separator}')
[docs]
@reader
def read_sdf(file_obj: TextIO) -> Atoms:
"""Read the sdf data and compose the corresponding Atoms object."""
lines = file_obj.readlines()
# first three lines header
del lines[:3]
num_atoms = get_num_atoms_sdf_v2000(lines.pop(0))
positions = []
symbols = []
for line in lines[:num_atoms]:
x, y, z, symbol = line.split()[:4]
symbols.append(symbol)
positions.append((float(x), float(y), float(z)))
return Atoms(symbols=symbols, positions=positions)