Skip to content

Commit 11c9d2d

Browse files
authored
Merge pull request #41 from Waztom/XCOS3
XCOS scoring version 3
2 parents 9238dd8 + 4672f12 commit 11c9d2d

File tree

1 file changed

+94
-68
lines changed

1 file changed

+94
-68
lines changed

src/python/pipelines/xchem/xcos.py

Lines changed: 94 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -101,85 +101,111 @@ def getFeatureMapScore(small_m, large_m, score_mode=FeatMaps.FeatMapScoreMode.Al
101101

102102

103103
# This is the main XCOS function
104-
def getReverseScores(mols, frags, COS_threshold, writer):
104+
def getReverseScores(mols, frags, score_threshold, writer):
105105

106106
for mol in mols:
107-
107+
108108
# Get the bits
109109
compound_bits = getBits(mol)
110110

111111
all_scores = []
112112

113113
for bit in compound_bits:
114+
115+
# Let's remove wildcard atoms
116+
# Removing wildcard atoms does not impact feat score but does lower shape overlay
117+
# For scoring should multiply feat score by number of non-wilcard atoms and use
118+
# all atoms including wildcard for shape overlay
119+
bit_without_wildcard_atoms = Chem.DeleteSubstructs(bit, Chem.MolFromSmarts('[#0]'))
120+
121+
# Let's only score bits that have more than one atom (do not count wildcard atoms)
122+
# Get number of bit atoms without wildcard atoms
123+
no_bit_atoms_without_wild_card = bit_without_wildcard_atoms.GetNumAtoms()
114124

115125
# Get number of bit atoms
116126
no_bit_atoms = bit.GetNumAtoms()
117127

118-
scores = []
119-
120-
for frag_mol in frags:
121-
122-
# NB reverse SuCOS scoring
123-
fm_score = getFeatureMapScore(bit, frag_mol)
124-
fm_score = np.clip(fm_score, 0, 1)
125-
# Change van der Waals radius scale for stricter overlay
126-
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol, allowReordering=False, vdwScale=0.2)
127-
protrude_dist = np.clip(protrude_dist, 0, 1)
128-
129-
# Get frag name for linking to score
130-
frag_name = frag_mol.GetProp('_Name').strip('Mpro-')
131-
132-
# Check if MCS yield > 0 atoms
133-
mcs_match = rdFMCS.FindMCS([bit,frag_mol],ringMatchesRingOnly=True,matchValences=True)
134-
135-
# Get number of atoms in MCS match found
136-
no_mcs_atoms = Chem.MolFromSmarts(mcs_match.smartsString).GetNumAtoms()
137-
138-
if no_mcs_atoms == 0:
139-
140-
scores.append((frag_name, 0, no_bit_atoms))
141-
142-
if no_mcs_atoms > 0:
143-
144-
# NB reverse SuCOS scoring
145-
fm_score = getFeatureMapScore(bit, frag_mol)
146-
fm_score = np.clip(fm_score, 0, 1)
147-
148-
# Change van der Waals radius scale for stricter overlay
149-
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol,
150-
allowReordering=False,
151-
vdwScale=0.2)
152-
protrude_dist = np.clip(protrude_dist, 0, 1)
153-
154-
reverse_SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)
155-
156-
scores.append((frag_name, reverse_SuCOS_score, no_bit_atoms))
157-
158-
all_scores.append(scores)
159-
160-
list_dfs = []
161-
162-
for score in all_scores:
163-
164-
df = pd.DataFrame(data=score, columns=['Fragment', 'Score', 'No_bit_atoms'])
128+
# Only score if enough info in bit to describe a vector - this will bias against
129+
# cases where frag has long aliphatic chain
130+
131+
if no_bit_atoms_without_wild_card > 1:
165132

166-
# Get maximum scoring fragment for bit match
167-
df = df[df['Score'] == df['Score'].max()]
168-
list_dfs.append(df)
169-
170-
final_df = pd.concat(list_dfs)
171-
172-
# Score 1: the score is scaled by the number of bit atoms
173-
score_1 = (final_df.No_bit_atoms * final_df.Score).sum()
174-
175-
# Let's only get frags above a threshold
176-
final_df = final_df[final_df.Score > COS_threshold]
177-
178-
# Let#s sort the df by increasing score
179-
final_df = final_df.sort_values(by=['Score'], ascending=False)
180-
181-
# Get the unique fragments above threshold
182-
all_frags = pd.unique(final_df.Fragment)
133+
scores = []
134+
135+
for frag_mol in frags:
136+
137+
# Get frag name for linking to score
138+
frag_name = frag_mol.GetProp('_Name').strip('Mpro-')
139+
140+
# Score only if some common structure shared between bit and fragment.
141+
# Check if MCS yield > 0 atoms
142+
mcs_match = rdFMCS.FindMCS([bit,frag_mol], ringMatchesRingOnly=True, matchValences=True)
143+
144+
# Get mcs_mol from mcs_match
145+
mcs_mol = Chem.MolFromSmarts(mcs_match.smartsString)
146+
147+
# check if frag has MCS mol
148+
mcs_test = frag_mol.HasSubstructMatch(mcs_mol)
149+
150+
if mcs_test:
151+
152+
# Change van der Waals radius scale for stricter overlay
153+
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol, allowReordering=False, vdwScale=0.2)
154+
protrude_dist = np.clip(protrude_dist, 0, 1)
155+
156+
protrude_score = 1 - protrude_dist
157+
158+
# We are comparing small bits relative to large frags
159+
# If overlay poor then assign score of 0
160+
# NB reverse SuCOS scoring. Feat map is also comp
161+
# more expensive
162+
163+
if protrude_score > score_threshold:
164+
165+
fm_score = getFeatureMapScore(bit, frag_mol)
166+
fm_score = np.clip(fm_score, 0, 1)
167+
168+
# What about good shape overlay but poor feat match?
169+
# Let's add a cutoff here to prevent good overlays with
170+
# poor feat match - eg. 3 mem ring 2 x C atoms overlay well
171+
# with 2 x aromatic ring Cs
172+
173+
if fm_score > score_threshold:
174+
# Use modified SuCOS score where feat_score scaled by number of bit atoms
175+
# without wildcard atoms and the shape overlay score by the number of bit atoms
176+
# including wildcard atoms
177+
scores.append((frag_name, protrude_score,no_bit_atoms,fm_score,no_bit_atoms_without_wild_card))
178+
else:
179+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
180+
else:
181+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
182+
else:
183+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
184+
185+
all_scores.append(scores)
186+
187+
list_dfs = []
188+
189+
for score in all_scores:
190+
191+
df = pd.DataFrame(data=score, columns = ['Fragment','Shape_score','no_bit_atoms','Feat_score','no_bit_atoms_without_wild_card'])
192+
193+
# Get maximum scoring fragment for bit match
194+
df['Modified_SuCOS_score'] = 0.5 * (df.Feat_score * df.no_bit_atoms_without_wild_card) + 0.5 * (df.Shape_score * df.no_bit_atoms)
195+
df = df[df['Modified_SuCOS_score'] == df['Modified_SuCOS_score'].max()]
196+
list_dfs.append(df)
197+
198+
final_df = pd.concat(list_dfs)
199+
200+
# Score 1: the score is scaled by the number of bit atoms
201+
score_1 = final_df.Modified_SuCOS_score.sum()
202+
203+
# Let's only get frags with a score > 0
204+
#final_df['SuCOS_score'] = 0.5 * final_df.Feat_score + 0.5 * final_df.Shape_score
205+
final_df = final_df[final_df.Modified_SuCOS_score > 0]
206+
207+
# Get the unique fragments above threshold
208+
all_frags = pd.unique(final_df.Fragment)
183209

184210
# Add props we want
185211
mol.SetProp(field_XCosRefMols, ','.join(all_frags))
@@ -204,8 +230,8 @@ def process(molecules, fragments, writer):
204230
else:
205231
utils.log('Using', len(frag_mol_list), 'fragments. No errors')
206232

207-
#mols, frags, COS_threshold, writer
208-
getReverseScores(molecules, frag_mol_list, 0.40, writer)
233+
#mols, frags, score_threshold, writer
234+
getReverseScores(molecules, frag_mol_list, 0.5, writer)
209235

210236

211237
def main():

0 commit comments

Comments
 (0)