Coverage for ase / io / sdf.py: 96.67%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1"""Reads chemical data in SDF format (wraps the MDL Molfile V2000 format). 

2 

3See https://en.wikipedia.org/wiki/Chemical_table_file#SDF 

4""" 

5 

6from datetime import datetime 

7from typing import TextIO 

8 

9import numpy as np 

10 

11from ase.atoms import Atoms 

12from ase.data import atomic_masses_iupac2016 

13from ase.io.utils import connectivity2bonds, validate_comment_line 

14from ase.utils import reader, writer 

15 

16 

17def serialize_property_v2000(prop: str, data: list[tuple[int, int]]) -> str: 

18 """Serialize atom-index-value pairs to a V2000 property block.""" 

19 block = '' 

20 # Split data into up to eight pairs per chunk 

21 chunks = [data[i : i + 8] for i in range(0, len(data), 8)] 

22 

23 for chunk in chunks: 

24 block += f'M {prop:3}{len(chunk):3}' 

25 

26 for i_atom, value in chunk: 

27 block += f' {i_atom:3} {value:3}' 

28 

29 block += '\n' 

30 

31 return block 

32 

33 

34def get_num_atoms_sdf_v2000(first_line: str) -> int: 

35 """Parse the first line extracting the number of atoms. 

36 

37 The V2000 dialect uses a fixed field length of 3, which means there 

38 won't be space between the numbers if there are 100+ atoms, and 

39 the format doesn't support 1000+ atoms at all. 

40 

41 http://biotech.fyicenter.com/1000024_SDF_File_Format_Specification.html 

42 """ 

43 return int(first_line[0:3]) # first three characters 

44 

45 

46@writer 

47def write_sdf( 

48 file_obj: TextIO, 

49 atoms: Atoms, 

50 title: str = '', 

51 comment: str = '', 

52 connectivity: np.ndarray | None = None, 

53 record_separator: str = '$$$$\n', 

54) -> None: 

55 r"""Write Atoms object to SDF file in MDL Molfile V2000 format. 

56 

57 Parameters 

58 ---------- 

59 fd : path or file object 

60 A file path or writable file-like object. 

61 

62 atoms : Atoms 

63 An ASE Atoms object with the atomic structure. 

64 

65 title: str 

66 Optional line for molecule name. 

67 

68 comment: str 

69 Optional comments. 

70 

71 connectivity: np.ndarray 

72 Adjacency matrix for connectivity of atoms 

73 (0 not connected, 1 connected). 

74 

75 record_separator: str 

76 Separator line used between records. 

77 """ 

78 title = validate_comment_line(title, name='Title') 

79 comment = validate_comment_line(comment) 

80 num_atoms = len(atoms) 

81 

82 if num_atoms > 999: 

83 raise ValueError('Cannot write more than 999 atoms.') 

84 

85 if connectivity is not None: 

86 bonds = connectivity2bonds(connectivity) 

87 else: 

88 bonds = [] 

89 

90 num_bonds = len(bonds) 

91 

92 if num_bonds > 999: 

93 raise ValueError('Cannot write more than 999 bonds.') 

94 

95 timestamp = datetime.now().strftime('%m%d%y%H%M') 

96 file_obj.write(f'{title}\n {"ASE":>8}{timestamp}3D\n{comment}\n') 

97 file_obj.write(f'{num_atoms:3}{num_bonds:3}') 

98 file_obj.write(8 * ' 0' + '999 V2000\n') 

99 

100 isotope_data = [] 

101 

102 for i, atom in enumerate(atoms, start=1): 

103 expected_mass = atomic_masses_iupac2016[atom.number] 

104 if not np.isclose(atom.mass, expected_mass, rtol=0, atol=1e-3): 

105 isotope_data.append((i, int(round(atom.mass)))) 

106 

107 for coord in atom.position: 

108 file_obj.write(f'{coord:10.4f}') 

109 

110 file_obj.write(f' {atom.symbol:3} 0' + 11 * ' 0' + '\n') 

111 

112 for i, j in bonds: 

113 file_obj.write(f'{i + 1:3}{j + 1:3} 1' + 4 * ' 0' + '\n') 

114 

115 if isotope_data: 

116 file_obj.write(serialize_property_v2000('ISO', isotope_data)) 

117 

118 file_obj.write(f'M END\n{record_separator}') 

119 

120 

121@reader 

122def read_sdf(file_obj: TextIO) -> Atoms: 

123 """Read the sdf data and compose the corresponding Atoms object.""" 

124 lines = file_obj.readlines() 

125 # first three lines header 

126 del lines[:3] 

127 

128 num_atoms = get_num_atoms_sdf_v2000(lines.pop(0)) 

129 positions = [] 

130 symbols = [] 

131 for line in lines[:num_atoms]: 

132 x, y, z, symbol = line.split()[:4] 

133 symbols.append(symbol) 

134 positions.append((float(x), float(y), float(z))) 

135 return Atoms(symbols=symbols, positions=positions)