Skip to content

Commit de015e3

Browse files
committed
initital SuCOS module
1 parent dfbcfae commit de015e3

File tree

5 files changed

+268
-0
lines changed

5 files changed

+268
-0
lines changed

data/sucos/4e3g_lig.sdf

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
RDKit 3D
3+
4+
10 10 0 0 0 0 0 0 0 0999 V2000
5+
17.3410 1.4040 15.6300 O 0 0 0 0 0 0 0 0 0 0 0 0
6+
16.4400 2.1870 15.2350 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
15.2530 1.8470 15.5410 O 0 0 0 0 0 0 0 0 0 0 0 0
8+
16.7060 3.4620 14.6760 C 0 0 0 0 0 0 0 0 0 0 0 0
9+
18.0480 3.9210 14.6600 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
18.4420 5.2100 14.1920 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
17.3440 5.9500 13.6530 C 0 0 0 0 0 0 0 0 0 0 0 0
12+
17.5460 7.2150 13.0840 O 0 0 0 0 0 0 0 0 0 0 0 0
13+
16.0150 5.4960 13.6580 C 0 0 0 0 0 0 0 0 0 0 0 0
14+
15.6810 4.2780 14.2290 C 0 0 0 0 0 0 0 0 0 0 0 0
15+
2 1 2 0
16+
3 2 1 0
17+
4 2 1 0
18+
5 4 2 0
19+
6 5 1 0
20+
7 6 2 0
21+
8 7 1 0
22+
9 7 1 0
23+
10 9 2 0
24+
10 4 1 0
25+
M END

data/sucos/benzene.sdf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
PyMOL2.1 3D 0
3+
4+
6 6 0 0 0 0 0 0 0 0999 V2000
5+
16.7060 3.4620 14.6760 C 0 0 0 0 0 0 0 0 0 0 0 0
6+
18.0480 3.9210 14.6600 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
18.4420 5.2100 14.1920 C 0 0 0 0 0 0 0 0 0 0 0 0
8+
17.3440 5.9500 13.6530 C 0 0 0 0 0 0 0 0 0 0 0 0
9+
16.0150 5.4960 13.6580 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
15.6810 4.2780 14.2290 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
1 2 2 0 0 0 0
12+
1 6 1 0 0 0 0
13+
2 3 1 0 0 0 0
14+
3 4 2 0 0 0 0
15+
4 5 1 0 0 0 0
16+
5 6 2 0 0 0 0
17+
M END
18+
$$$$
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"@class":"org.squonk.core.DockerServiceDescriptor",
3+
"serviceConfig": {
4+
"id": "pipelines.rdkit.sucos.basic",
5+
"name": "RDKitSuCOS",
6+
"description": "Generate 3D overlay using SuCOS in RDKit",
7+
"tags": [
8+
"rdkit",
9+
"alignment",
10+
"sucos",
11+
"3d",
12+
"docker"
13+
],
14+
"resourceUrl": null,
15+
"icon": "icons/filter_molecules.png",
16+
"inputDescriptors": [
17+
{
18+
"primaryType": "org.squonk.dataset.Dataset",
19+
"secondaryType": "org.squonk.types.MoleculeObject",
20+
"mediaType": "application/x-squonk-dataset-molecule+json",
21+
"name": "input"
22+
},
23+
{
24+
"primaryType": "org.squonk.dataset.Dataset",
25+
"secondaryType": "org.squonk.types.MoleculeObject",
26+
"mediaType": "application/x-squonk-dataset-molecule+json",
27+
"name": "target"
28+
}
29+
],
30+
"outputDescriptors": [
31+
{
32+
"primaryType": "org.squonk.dataset.Dataset",
33+
"secondaryType": "org.squonk.types.MoleculeObject",
34+
"mediaType": "application/x-squonk-dataset-molecule+json",
35+
"name": "output"
36+
}
37+
],
38+
"optionDescriptors": [
39+
{
40+
"modes": [
41+
"User"
42+
],
43+
"editable": true,
44+
"visible": true,
45+
"description": "Target molecule index (default is the first)",
46+
"label": "Target mol index",
47+
"key": "arg.target",
48+
"minValues": 0,
49+
"maxValues": 1,
50+
"typeDescriptor": {
51+
"type": "java.lang.Integer",
52+
"@class": "org.squonk.options.SimpleTypeDescriptor"
53+
},
54+
"@class": "org.squonk.options.OptionDescriptor"
55+
}
56+
],
57+
"executorClassName": "org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep"
58+
},
59+
"thinDescriptors": [
60+
{
61+
"input": "input"
62+
}
63+
],
64+
"inputRoutes": [
65+
{
66+
"route": "FILE"
67+
},
68+
{
69+
"route": "FILE"
70+
}
71+
],
72+
"outputRoutes": [
73+
{
74+
"route": "FILE"
75+
}
76+
],
77+
"imageName": "informaticsmatters/rdkit_pipelines",
78+
"command": "python -m pipelines.rdkit.sucos --target target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json${binding.variables.containsKey('target') ? ' --targetidx ' + binding.variables.get('targetidx') : ''} --meta"
79+
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Informatics Matters Ltd.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import argparse, os
18+
19+
from rdkit import Chem, rdBase, RDConfig
20+
from rdkit.Chem import AllChem, rdShapeHelpers
21+
from rdkit.Chem.FeatMaps import FeatMaps
22+
23+
from pipelines_utils import parameter_utils, utils
24+
from pipelines_utils_rdkit import rdkit_utils
25+
26+
### start field name defintions #########################################
27+
28+
field_SuCOS_Score = "SuCOS_Score"
29+
30+
### start function definitions #########################################
31+
32+
#################################################
33+
#### Setting up the features to use in FeatureMap
34+
fdef = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
35+
# keep = ('Donor','Acceptor','NegIonizable','PosIonizable','Aromatic')
36+
37+
fmParams = {}
38+
for k in fdef.GetFeatureFamilies():
39+
fparams = FeatMaps.FeatMapParams()
40+
fmParams[k] = fparams
41+
42+
keep = ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 'ZnBinder',
43+
'Aromatic', 'Hydrophobe', 'LumpedHydrophobe')
44+
45+
def get_FeatureMapScore(ref_mol, query_mol):
46+
featLists = []
47+
for m in [ref_mol, query_mol]:
48+
rawFeats = fdef.GetFeaturesForMol(m)
49+
# filter that list down to only include the ones we're interested in
50+
featLists.append([f for f in rawFeats if f.GetFamily() in keep])
51+
fms = [FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams) for x in featLists]
52+
#utils.log("Calc:", str(fms[0].ScoreFeats(featLists[1])), "/ float(min(", str(fms[0].GetNumFeatures()), str(len(featLists[1])), "))")
53+
fm_score = fms[0].ScoreFeats(featLists[1]) / float(min(fms[0].GetNumFeatures(), len(featLists[1])))
54+
return fm_score
55+
56+
def get_SucosScore(ref_mol, query_mol, field_name):
57+
fm_score = get_FeatureMapScore(ref_mol, query_mol)
58+
#utils.log("FeatureMapScore:", str(fm_score))
59+
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False)
60+
#utils.log("ProtrudeDistance:", str(protrude_dist))
61+
#utils.log("Sucos calc: 0.5 *", str(fm_score), "+ 0.5 * (1.0 -", protrude_dist, ")")
62+
score = 0.5 * fm_score + 0.5 * (1.0 - protrude_dist)
63+
#utils.log("SucosScore:", str(score))
64+
query_mol.SetDoubleProp(field_name, score)
65+
return score
66+
67+
### start main execution #########################################
68+
69+
def main():
70+
71+
parser = argparse.ArgumentParser(description='SuCOS with RDKit')
72+
parser.add_argument('--target', help='molecule to compare against')
73+
parser.add_argument('--targetidx', help="Target molecule index in SD file if not the first", type=int, default=1)
74+
parameter_utils.add_default_io_args(parser)
75+
76+
args = parser.parse_args()
77+
utils.log("SuCOS Args: ", args)
78+
79+
# TODO - handle molecules with multiple fragments
80+
81+
ref_mol = rdkit_utils.read_single_molecule(args.target, index=args.targetidx)
82+
utils.log("Reference mol has", str(ref_mol.GetNumHeavyAtoms()), "heavy atoms")
83+
84+
source = "sucos.py"
85+
datasetMetaProps = {"source":source, "description": "SuCOS using RDKit " + rdBase.rdkitVersion}
86+
clsMappings = { "SuCOS_score": "java.lang.Float" }
87+
fieldMetaProps = [
88+
{"fieldName":field_SuCOS_Score, "values": {"source":source, "description":"SuCOS score"}}
89+
]
90+
91+
input,output,suppl,writer,output_base = rdkit_utils.\
92+
default_open_input_output(args.input, args.informat, args.output,
93+
'sucos', args.outformat,
94+
valueClassMappings=clsMappings,
95+
datasetMetaProps=datasetMetaProps,
96+
fieldMetaProps=fieldMetaProps)
97+
98+
count = 0
99+
total = 0
100+
errors = 0
101+
for mol in suppl:
102+
count +=1
103+
if mol is None:
104+
continue
105+
#utils.log("Mol has", str(mol.GetNumHeavyAtoms()), "heavy atoms")
106+
try:
107+
fm_score = get_SucosScore(ref_mol, mol, field_SuCOS_Score)
108+
utils.log("Score:", str(fm_score))
109+
writer.write(mol)
110+
total +=1
111+
except ValueError as e:
112+
errors +=1
113+
utils.log("Molecule", count, "failed to score:", e.message)
114+
115+
input.close()
116+
writer.flush()
117+
writer.close()
118+
output.close()
119+
120+
if args.meta:
121+
utils.write_metrics(output_base, {'__InputCount__':count, '__OutputCount__':total, '__ErrorCount__':errors, 'RDKitSuCOS':total})
122+
123+
if __name__ == "__main__":
124+
main()
125+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Automated pipeline test specification.
2+
3+
[
4+
5+
version = 1,
6+
7+
// Testing sucos.py reading from file and writing to file
8+
test_raw_sucos_sdf_to_sdf = [
9+
10+
command: '''python -m pipelines.rdkit.sucos
11+
--target ${PIN}/sucos/4e3g_lig.sdf
12+
-i ${PIN}/sucos/benzene.sdf -if sdf
13+
-o ${POUT}output -of sdf''',
14+
15+
stderr: [ 'Score: 0.843' ],
16+
17+
creates: [ 'output.sdf.gz' ],
18+
19+
]
20+
21+
]

0 commit comments

Comments
 (0)