3
3
from rdkit .Chem .rdchem import Atom , Bond , Mol , BondType
4
4
import os
5
5
import time
6
+
6
7
# openbabel
7
8
try :
8
9
from openbabel import openbabel
10
+
9
11
USE_OBABEL = True
10
12
except ModuleNotFoundError as e :
11
13
USE_OBABEL = False
12
14
15
+
13
16
def get_explicit_valence (atom , verbose = False ):
14
17
exp_val_calculated_from_bonds = int (sum ([bond .GetBondTypeAsDouble () for bond in atom .GetBonds ()]))
15
18
try :
16
19
exp_val = atom .GetExplicitValence ()
17
20
if exp_val != exp_val_calculated_from_bonds :
18
21
if verbose :
19
- print (f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on { atom .GetSymbol ()} { atom .GetIdx ()+ 1 } , using sum of bond order." )
22
+ print (
23
+ f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on { atom .GetSymbol ()} { atom .GetIdx () + 1 } , using sum of bond order." )
20
24
return exp_val_calculated_from_bonds
21
25
except :
22
26
return exp_val_calculated_from_bonds
23
27
28
+
24
29
def regularize_formal_charges (mol , sanitize = True , verbose = False ):
25
30
"""
26
31
Regularize formal charges of atoms
@@ -37,6 +42,7 @@ def regularize_formal_charges(mol, sanitize=True, verbose=False):
37
42
else :
38
43
return mol
39
44
45
+
40
46
def assign_formal_charge_for_atom (atom , verbose = False ):
41
47
"""
42
48
assigen formal charge according to 8-electron rule for element B,C,N,O,S,P,As
@@ -48,12 +54,13 @@ def assign_formal_charge_for_atom(atom, verbose=False):
48
54
elif atom .GetSymbol () == "C" :
49
55
atom .SetFormalCharge (valence - 4 )
50
56
if valence == 3 :
51
- print (f"Detect a valence of 3 on #C{ atom .GetIdx ()+ 1 } , the formal charge of this atom will be assigned to -1" )
57
+ print (
58
+ f"Detect a valence of 3 on #C{ atom .GetIdx () + 1 } , the formal charge of this atom will be assigned to -1" )
52
59
elif valence > 4 :
53
- raise ValueError (f"#C{ atom .GetIdx ()+ 1 } has a valence larger than 4" )
60
+ raise ValueError (f"#C{ atom .GetIdx () + 1 } has a valence larger than 4" )
54
61
elif atom .GetSymbol () == "N" :
55
62
if valence > 4 :
56
- raise ValueError (f"#N{ atom .GetIdx ()+ 1 } has a valence larger than 4" )
63
+ raise ValueError (f"#N{ atom .GetIdx () + 1 } has a valence larger than 4" )
57
64
else :
58
65
atom .SetFormalCharge (valence - 3 )
59
66
elif atom .GetSymbol () == "O" :
@@ -64,34 +71,35 @@ def assign_formal_charge_for_atom(atom, verbose=False):
64
71
elif valence == 3 :
65
72
atom .SetFormalCharge (1 )
66
73
elif valence > 6 :
67
- raise ValueError (f"#S{ atom .GetIdx ()+ 1 } has a valence larger than 6" )
74
+ raise ValueError (f"#S{ atom .GetIdx () + 1 } has a valence larger than 6" )
68
75
else :
69
76
atom .SetFormalCharge (0 )
70
77
elif atom .GetSymbol () == "P" or atom .GetSymbol () == "As" :
71
78
if valence == 5 :
72
79
atom .SetFormalCharge (0 )
73
80
elif valence > 5 :
74
- raise ValueError (f"#{ atom .GetSymbol ()} { atom .GetIdx ()+ 1 } has a valence larger than 5" )
81
+ raise ValueError (f"#{ atom .GetSymbol ()} { atom .GetIdx () + 1 } has a valence larger than 5" )
75
82
else :
76
83
atom .SetFormalCharge (valence - 3 )
77
84
85
+
78
86
# print bond and atom information (for debugger)
79
87
def print_bonds (mol ):
80
88
for bond in mol .GetBonds ():
81
89
begin_atom = bond .GetBeginAtom ()
82
90
end_atom = bond .GetEndAtom ()
83
- print (f'{ begin_atom .GetSymbol ()} { begin_atom .GetIdx ()+ 1 } { end_atom .GetSymbol ()} { end_atom .GetIdx ()+ 1 } { bond .GetBondType ()} ' )
91
+ print (
92
+ f'{ begin_atom .GetSymbol ()} { begin_atom .GetIdx () + 1 } { end_atom .GetSymbol ()} { end_atom .GetIdx () + 1 } { bond .GetBondType ()} ' )
93
+
84
94
85
95
def print_atoms (mol ):
86
96
for atom in mol .GetAtoms ():
87
- print (f'{ atom .GetSymbol ()} { atom .GetIdx ()+ 1 } { atom .GetFormalCharge ()} { get_explicit_valence (atom )} ' )
97
+ print (f'{ atom .GetSymbol ()} { atom .GetIdx () + 1 } { atom .GetFormalCharge ()} { get_explicit_valence (atom )} ' )
88
98
89
99
90
100
def is_terminal_oxygen (O_atom ):
91
- if len (O_atom .GetNeighbors ()) == 1 :
92
- return True
93
- else :
94
- return False
101
+ return len (O_atom .GetNeighbors ()) == 1
102
+
95
103
96
104
def get_terminal_oxygens (atom ):
97
105
terminal_oxygens = []
@@ -101,6 +109,21 @@ def get_terminal_oxygens(atom):
101
109
terminal_oxygens .append (nei )
102
110
return terminal_oxygens
103
111
112
+
113
+ def is_terminal_NR2 (N_atom ):
114
+ return len (N_atom .GetNeighbors ()) == 3
115
+
116
+
117
+ def get_terminal_NR2s (atom ):
118
+ terminal_NR2s = []
119
+ for nei in atom .GetNeighbors ():
120
+ if nei .GetSymbol () == "N" :
121
+ if is_terminal_NR2 (nei ):
122
+ terminal_NR2s .append (nei )
123
+ terminal_NR2s .sort (key = lambda N_atom : len ([atom for atom in N_atom .GetNeighbors () if atom .GetSymbol () == 'H' ]))
124
+ return terminal_NR2s
125
+
126
+
104
127
def sanitize_phosphate_Patom (P_atom , verbose = True ):
105
128
if P_atom .GetSymbol () == "P" :
106
129
terminal_oxygens = get_terminal_oxygens (P_atom )
@@ -116,11 +139,13 @@ def sanitize_phosphate_Patom(P_atom, verbose=True):
116
139
bond .SetBondType (Chem .rdchem .BondType .SINGLE )
117
140
terminal_oxygens [ii ].SetFormalCharge (- 1 )
118
141
142
+
119
143
def sanitize_phosphate (mol ):
120
144
for atom in mol .GetAtoms ():
121
145
sanitize_phosphate_Patom (atom )
122
146
return mol
123
147
148
+
124
149
def sanitize_sulfate_Satom (S_atom , verbose = True ):
125
150
if S_atom .GetSymbol () == "S" :
126
151
terminal_oxygens = get_terminal_oxygens (S_atom )
@@ -136,11 +161,13 @@ def sanitize_sulfate_Satom(S_atom, verbose=True):
136
161
bond = mol .GetBondBetweenAtoms (S_atom .GetIdx (), terminal_oxygens [ii ].GetIdx ())
137
162
bond .SetBondType (Chem .rdchem .BondType .DOUBLE )
138
163
164
+
139
165
def sanitize_sulfate (mol ):
140
166
for atom in mol .GetAtoms ():
141
167
sanitize_sulfate_Satom (atom )
142
168
return mol
143
169
170
+
144
171
def sanitize_carboxyl_Catom (C_atom , verbose = True ):
145
172
if C_atom .GetSymbol () == "C" :
146
173
terminal_oxygens = get_terminal_oxygens (C_atom )
@@ -157,11 +184,40 @@ def sanitize_carboxyl_Catom(C_atom, verbose=True):
157
184
bond2 .SetBondType (Chem .rdchem .BondType .DOUBLE )
158
185
terminal_oxygens [1 ].SetFormalCharge (0 )
159
186
187
+
160
188
def sanitize_carboxyl (mol ):
161
189
for atom in mol .GetAtoms ():
162
190
sanitize_carboxyl_Catom (atom )
163
191
return mol
164
192
193
+
194
+ def sanitize_guanidine_Catom (C_atom , verbose = True ):
195
+ if C_atom .GetSymbol () == "C" :
196
+ terminal_NR2s = get_terminal_NR2s (C_atom )
197
+ mol = C_atom .GetOwningMol ()
198
+ if len (terminal_NR2s ) == 3 :
199
+ if verbose :
200
+ print ("Guanidyl group detected, sanitizing it..." )
201
+ # set two C-N and one C=N+
202
+ bond1 = mol .GetBondBetweenAtoms (C_atom .GetIdx (), terminal_NR2s [0 ].GetIdx ())
203
+ bond1 .SetBondType (Chem .rdchem .BondType .SINGLE )
204
+ terminal_NR2s [0 ].SetFormalCharge (- 1 )
205
+
206
+ bond2 = mol .GetBondBetweenAtoms (C_atom .GetIdx (), terminal_NR2s [1 ].GetIdx ())
207
+ bond2 .SetBondType (Chem .rdchem .BondType .SINGLE )
208
+ terminal_NR2s [1 ].SetFormalCharge (0 )
209
+
210
+ bond3 = mol .GetBondBetweenAtoms (C_atom .GetIdx (), terminal_NR2s [2 ].GetIdx ())
211
+ bond3 .SetBondType (Chem .rdchem .BondType .DOUBLE )
212
+ terminal_NR2s [2 ].SetFormalCharge (1 )
213
+
214
+
215
+ def sanitize_guanidine (mol ):
216
+ for atom in mol .GetAtoms ():
217
+ sanitize_guanidine_Catom (atom )
218
+ return mol
219
+
220
+
165
221
def sanitize_nitro_Natom (N_atom , verbose = True ):
166
222
if N_atom .GetSymbol () == "N" :
167
223
terminal_oxygens = get_terminal_oxygens (N_atom )
@@ -178,17 +234,20 @@ def sanitize_nitro_Natom(N_atom, verbose=True):
178
234
bond2 .SetBondType (Chem .rdchem .BondType .DOUBLE )
179
235
terminal_oxygens [1 ].SetFormalCharge (0 )
180
236
237
+
181
238
def sanitize_nitro (mol ):
182
239
for atom in mol .GetAtoms ():
183
240
sanitize_nitro_Natom (atom )
184
241
return mol
185
242
243
+
186
244
def is_terminal_nitrogen (N_atom ):
187
245
if N_atom .GetSymbol () == 'N' and len (N_atom .GetNeighbors ()) == 1 :
188
246
return True
189
247
else :
190
248
return False
191
249
250
+
192
251
def sanitize_nitrine_Natom (atom , verbose = True ):
193
252
if atom .GetSymbol () == "N" and len (atom .GetNeighbors ()) == 2 :
194
253
mol = atom .GetOwningMol ()
@@ -213,7 +272,8 @@ def sanitize_nitrine_Natom(atom, verbose=True):
213
272
214
273
bond = mol .GetBondBetweenAtoms (atom .GetIdx (), N_non_terminal .GetIdx ())
215
274
bond .SetBondType (Chem .rdchem .BondType .DOUBLE )
216
- atom .SetFormalCharge (1 )
275
+ atom .SetFormalCharge (1 )
276
+
217
277
218
278
def contain_hetero_aromatic (mol ):
219
279
flag = False
@@ -223,6 +283,7 @@ def contain_hetero_aromatic(mol):
223
283
break
224
284
return flag
225
285
286
+
226
287
# for carbon with explicit valence > 4
227
288
def regularize_carbon_bond_order (atom , verbose = True ):
228
289
if atom .GetSymbol () == "C" and get_explicit_valence (atom ) > 4 :
@@ -240,6 +301,7 @@ def regularize_carbon_bond_order(atom, verbose=True):
240
301
if bond .GetIdx () != double_bond_idx :
241
302
bond .SetBondType (Chem .rdchem .BondType .SINGLE )
242
303
304
+
243
305
# for nitrogen with explicit valence > 4
244
306
def regularize_nitrogen_bond_order (atom , verbose = True ):
245
307
mol = atom .GetOwningMol ()
@@ -255,6 +317,7 @@ def regularize_nitrogen_bond_order(atom, verbose=True):
255
317
def sanitize_mol (mol , verbose = False ):
256
318
for atom in mol .GetAtoms ():
257
319
sanitize_carboxyl_Catom (atom , verbose )
320
+ sanitize_guanidine_Catom (atom , verbose )
258
321
sanitize_phosphate_Patom (atom , verbose )
259
322
sanitize_sulfate_Satom (atom , verbose )
260
323
sanitize_nitro_Natom (atom , verbose )
@@ -272,6 +335,7 @@ def mol_edit_log(mol, i, j):
272
335
edited = mol .GetProp ("edit" )
273
336
mol .SetProp ("edit" , edited + ",%d_%d" % (i , j ))
274
337
338
+
275
339
def kekulize_aromatic_heterocycles (mol_in , assign_formal_charge = True , sanitize = True ):
276
340
mol = Chem .RWMol (mol_in )
277
341
rings = Chem .rdmolops .GetSymmSSSR (mol )
@@ -345,7 +409,7 @@ def hetero_priority(idx, mol):
345
409
elif bAllAr and not bAllC :
346
410
HAr .append (ring )
347
411
348
- if len (HAr ) == 0 :
412
+ if len (HAr ) == 0 :
349
413
# no hetrerocycles
350
414
return mol_in
351
415
else :
@@ -364,7 +428,7 @@ def hetero_priority(idx, mol):
364
428
fuseCAr [i ] = j
365
429
break
366
430
if i > 1 :
367
- if (fuseCAr [i ] == fuseCAr [i - 1 ]) & (fuseCAr [i ] >= 0 ):
431
+ if (fuseCAr [i ] == fuseCAr [i - 1 ]) & (fuseCAr [i ] >= 0 ):
368
432
fuseDouble .append (i )
369
433
atom = mol .GetAtomWithIdx (ring [i ])
370
434
if atom .GetSymbol () != 'C' :
@@ -376,7 +440,7 @@ def hetero_priority(idx, mol):
376
440
hasDouble .append (i )
377
441
bond = mol .GetBondBetweenAtoms (ring [i ], ring [(i + 1 ) % lring ])
378
442
379
- if (fuseCAr [0 ] == fuseCAr [lring - 1 ]) & (fuseCAr [0 ] >= 0 ):
443
+ if (fuseCAr [0 ] == fuseCAr [lring - 1 ]) & (fuseCAr [0 ] >= 0 ):
380
444
fuseDouble .append (0 )
381
445
382
446
if (len (hetero ) > 0 ) | (len (hasDouble ) > 0 ):
@@ -446,6 +510,7 @@ def hetero_priority(idx, mol):
446
510
except Exception as e :
447
511
raise RuntimeError (f"Manual kekulization for aromatic heterocycles failed, below are errors:\n \t { e } " )
448
512
513
+
449
514
def convert_by_obabel (mol , cache_dir = os .path .join (os .getcwd (), '.cache' ), obabel_path = "obabel" ):
450
515
if not os .path .exists (cache_dir ):
451
516
os .mkdir (cache_dir )
@@ -464,6 +529,7 @@ def convert_by_obabel(mol, cache_dir=os.path.join(os.getcwd(), '.cache'), obabel
464
529
mol_obabel = Chem .MolFromMolFile (mol_file_out , removeHs = False , sanitize = False )
465
530
return mol_obabel
466
531
532
+
467
533
def super_sanitize_mol (mol , name = None , verbose = True ):
468
534
if name is None :
469
535
if mol .HasProp ("_Name" ):
@@ -484,11 +550,13 @@ def super_sanitize_mol(mol, name=None, verbose=True):
484
550
except Exception as e :
485
551
try :
486
552
if verbose :
487
- print ("Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel" )
553
+ print (
554
+ "Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel" )
488
555
print ("=====Stage 2: re-try with obabel=====" )
489
556
mol = convert_by_obabel (mol )
490
557
mol = sanitize_mol (mol , verbose )
491
- mol = kekulize_aromatic_heterocycles (mol , assign_formal_charge = False , sanitize = False ) # aromatic heterocycles
558
+ mol = kekulize_aromatic_heterocycles (mol , assign_formal_charge = False ,
559
+ sanitize = False ) # aromatic heterocycles
492
560
mol = regularize_formal_charges (mol , sanitize = False )
493
561
mol_copy = deepcopy (mol )
494
562
Chem .SanitizeMol (mol_copy )
@@ -501,6 +569,7 @@ def super_sanitize_mol(mol, name=None, verbose=True):
501
569
print (name , "Failed!" )
502
570
return None
503
571
572
+
504
573
class Sanitizer (object ):
505
574
def __init__ (self , level = 'medium' , raise_errors = True , verbose = False ):
506
575
'''
@@ -526,7 +595,7 @@ def _check_level(self, level):
526
595
else :
527
596
if level == 'high' and not USE_OBABEL :
528
597
raise ModuleNotFoundError ("obabel not installed, high level sanitizer cannot work" )
529
-
598
+
530
599
def _handle_exception (self , error_info ):
531
600
if self .raise_errors :
532
601
raise SanitizeError (error_info )
@@ -561,6 +630,7 @@ def sanitize(self, mol):
561
630
self ._handle_exception (error_info )
562
631
return mol
563
632
633
+
564
634
class SanitizeError (Exception ):
565
635
def __init__ (self , content = "Sanitization Failed." ):
566
636
self .content = content
0 commit comments