@@ -101,85 +101,111 @@ def getFeatureMapScore(small_m, large_m, score_mode=FeatMaps.FeatMapScoreMode.Al
101
101
102
102
103
103
# This is the main XCOS function
104
- def getReverseScores (mols , frags , COS_threshold , writer ):
104
+ def getReverseScores (mols , frags , score_threshold , writer ):
105
105
106
106
for mol in mols :
107
-
107
+
108
108
# Get the bits
109
109
compound_bits = getBits (mol )
110
110
111
111
all_scores = []
112
112
113
113
for bit in compound_bits :
114
+
115
+ # Let's remove wildcard atoms
116
+ # Removing wildcard atoms does not impact feat score but does lower shape overlay
117
+ # For scoring should multiply feat score by number of non-wilcard atoms and use
118
+ # all atoms including wildcard for shape overlay
119
+ bit_without_wildcard_atoms = Chem .DeleteSubstructs (bit , Chem .MolFromSmarts ('[#0]' ))
120
+
121
+ # Let's only score bits that have more than one atom (do not count wildcard atoms)
122
+ # Get number of bit atoms without wildcard atoms
123
+ no_bit_atoms_without_wild_card = bit_without_wildcard_atoms .GetNumAtoms ()
114
124
115
125
# Get number of bit atoms
116
126
no_bit_atoms = bit .GetNumAtoms ()
117
127
118
- scores = []
119
-
120
- for frag_mol in frags :
121
-
122
- # NB reverse SuCOS scoring
123
- fm_score = getFeatureMapScore (bit , frag_mol )
124
- fm_score = np .clip (fm_score , 0 , 1 )
125
- # Change van der Waals radius scale for stricter overlay
126
- protrude_dist = rdShapeHelpers .ShapeProtrudeDist (bit , frag_mol , allowReordering = False , vdwScale = 0.2 )
127
- protrude_dist = np .clip (protrude_dist , 0 , 1 )
128
-
129
- # Get frag name for linking to score
130
- frag_name = frag_mol .GetProp ('_Name' ).strip ('Mpro-' )
131
-
132
- # Check if MCS yield > 0 atoms
133
- mcs_match = rdFMCS .FindMCS ([bit ,frag_mol ],ringMatchesRingOnly = True ,matchValences = True )
134
-
135
- # Get number of atoms in MCS match found
136
- no_mcs_atoms = Chem .MolFromSmarts (mcs_match .smartsString ).GetNumAtoms ()
137
-
138
- if no_mcs_atoms == 0 :
139
-
140
- scores .append ((frag_name , 0 , no_bit_atoms ))
141
-
142
- if no_mcs_atoms > 0 :
143
-
144
- # NB reverse SuCOS scoring
145
- fm_score = getFeatureMapScore (bit , frag_mol )
146
- fm_score = np .clip (fm_score , 0 , 1 )
147
-
148
- # Change van der Waals radius scale for stricter overlay
149
- protrude_dist = rdShapeHelpers .ShapeProtrudeDist (bit , frag_mol ,
150
- allowReordering = False ,
151
- vdwScale = 0.2 )
152
- protrude_dist = np .clip (protrude_dist , 0 , 1 )
153
-
154
- reverse_SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist )
155
-
156
- scores .append ((frag_name , reverse_SuCOS_score , no_bit_atoms ))
157
-
158
- all_scores .append (scores )
159
-
160
- list_dfs = []
161
-
162
- for score in all_scores :
163
-
164
- df = pd .DataFrame (data = score , columns = ['Fragment' , 'Score' , 'No_bit_atoms' ])
128
+ # Only score if enough info in bit to describe a vector - this will bias against
129
+ # cases where frag has long aliphatic chain
130
+
131
+ if no_bit_atoms_without_wild_card > 1 :
165
132
166
- # Get maximum scoring fragment for bit match
167
- df = df [df ['Score' ] == df ['Score' ].max ()]
168
- list_dfs .append (df )
169
-
170
- final_df = pd .concat (list_dfs )
171
-
172
- # Score 1: the score is scaled by the number of bit atoms
173
- score_1 = (final_df .No_bit_atoms * final_df .Score ).sum ()
174
-
175
- # Let's only get frags above a threshold
176
- final_df = final_df [final_df .Score > COS_threshold ]
177
-
178
- # Let#s sort the df by increasing score
179
- final_df = final_df .sort_values (by = ['Score' ], ascending = False )
180
-
181
- # Get the unique fragments above threshold
182
- all_frags = pd .unique (final_df .Fragment )
133
+ scores = []
134
+
135
+ for frag_mol in frags :
136
+
137
+ # Get frag name for linking to score
138
+ frag_name = frag_mol .GetProp ('_Name' ).strip ('Mpro-' )
139
+
140
+ # Score only if some common structure shared between bit and fragment.
141
+ # Check if MCS yield > 0 atoms
142
+ mcs_match = rdFMCS .FindMCS ([bit ,frag_mol ], ringMatchesRingOnly = True , matchValences = True )
143
+
144
+ # Get mcs_mol from mcs_match
145
+ mcs_mol = Chem .MolFromSmarts (mcs_match .smartsString )
146
+
147
+ # check if frag has MCS mol
148
+ mcs_test = frag_mol .HasSubstructMatch (mcs_mol )
149
+
150
+ if mcs_test :
151
+
152
+ # Change van der Waals radius scale for stricter overlay
153
+ protrude_dist = rdShapeHelpers .ShapeProtrudeDist (bit , frag_mol , allowReordering = False , vdwScale = 0.2 )
154
+ protrude_dist = np .clip (protrude_dist , 0 , 1 )
155
+
156
+ protrude_score = 1 - protrude_dist
157
+
158
+ # We are comparing small bits relative to large frags
159
+ # If overlay poor then assign score of 0
160
+ # NB reverse SuCOS scoring. Feat map is also comp
161
+ # more expensive
162
+
163
+ if protrude_score > score_threshold :
164
+
165
+ fm_score = getFeatureMapScore (bit , frag_mol )
166
+ fm_score = np .clip (fm_score , 0 , 1 )
167
+
168
+ # What about good shape overlay but poor feat match?
169
+ # Let's add a cutoff here to prevent good overlays with
170
+ # poor feat match - eg. 3 mem ring 2 x C atoms overlay well
171
+ # with 2 x aromatic ring Cs
172
+
173
+ if fm_score > score_threshold :
174
+ # Use modified SuCOS score where feat_score scaled by number of bit atoms
175
+ # without wildcard atoms and the shape overlay score by the number of bit atoms
176
+ # including wildcard atoms
177
+ scores .append ((frag_name , protrude_score ,no_bit_atoms ,fm_score ,no_bit_atoms_without_wild_card ))
178
+ else :
179
+ scores .append ((frag_name ,0 ,no_bit_atoms ,0 ,no_bit_atoms_without_wild_card ))
180
+ else :
181
+ scores .append ((frag_name ,0 ,no_bit_atoms ,0 ,no_bit_atoms_without_wild_card ))
182
+ else :
183
+ scores .append ((frag_name ,0 ,no_bit_atoms ,0 ,no_bit_atoms_without_wild_card ))
184
+
185
+ all_scores .append (scores )
186
+
187
+ list_dfs = []
188
+
189
+ for score in all_scores :
190
+
191
+ df = pd .DataFrame (data = score , columns = ['Fragment' ,'Shape_score' ,'no_bit_atoms' ,'Feat_score' ,'no_bit_atoms_without_wild_card' ])
192
+
193
+ # Get maximum scoring fragment for bit match
194
+ df ['Modified_SuCOS_score' ] = 0.5 * (df .Feat_score * df .no_bit_atoms_without_wild_card ) + 0.5 * (df .Shape_score * df .no_bit_atoms )
195
+ df = df [df ['Modified_SuCOS_score' ] == df ['Modified_SuCOS_score' ].max ()]
196
+ list_dfs .append (df )
197
+
198
+ final_df = pd .concat (list_dfs )
199
+
200
+ # Score 1: the score is scaled by the number of bit atoms
201
+ score_1 = final_df .Modified_SuCOS_score .sum ()
202
+
203
+ # Let's only get frags with a score > 0
204
+ #final_df['SuCOS_score'] = 0.5 * final_df.Feat_score + 0.5 * final_df.Shape_score
205
+ final_df = final_df [final_df .Modified_SuCOS_score > 0 ]
206
+
207
+ # Get the unique fragments above threshold
208
+ all_frags = pd .unique (final_df .Fragment )
183
209
184
210
# Add props we want
185
211
mol .SetProp (field_XCosRefMols , ',' .join (all_frags ))
@@ -204,8 +230,8 @@ def process(molecules, fragments, writer):
204
230
else :
205
231
utils .log ('Using' , len (frag_mol_list ), 'fragments. No errors' )
206
232
207
- #mols, frags, COS_threshold , writer
208
- getReverseScores (molecules , frag_mol_list , 0.40 , writer )
233
+ #mols, frags, score_threshold , writer
234
+ getReverseScores (molecules , frag_mol_list , 0.5 , writer )
209
235
210
236
211
237
def main ():
0 commit comments