52
52
field_SuCOSCum_ProtrudeScore = "SuCOS_Cum_Protrude_Score"
53
53
54
54
55
- def process (inputs_supplr , targets_supplr , writer , field_name ):
56
-
55
+ def process (inputs_supplr , targets_supplr , writer , field_name , filter_value , filter_field ):
57
56
cluster = []
58
57
mol_ids = []
59
58
i = 0
@@ -72,6 +71,7 @@ def process(inputs_supplr, targets_supplr, writer, field_name):
72
71
cluster .append ((mol , features ))
73
72
except :
74
73
utils .log ("WARNING: failed to generate features for molecule" , i , sys .exc_info ())
74
+ utils .log ("Generated features for" , len (cluster ), "molecules" )
75
75
76
76
comparisons = 0
77
77
mol_num = 0
@@ -90,51 +90,53 @@ def process(inputs_supplr, targets_supplr, writer, field_name):
90
90
errors += 1
91
91
continue
92
92
93
- max_scores = [0 , 0 , 0 ]
94
- cum_scores = [0 , 0 , 0 ]
95
- best_id = None
93
+ scores_max = [0 , 0 , 0 ]
94
+ scores_cum = [0 , 0 , 0 ]
96
95
97
96
index = 0
98
97
for entry in cluster :
99
98
hit = entry [0 ]
100
99
ref_features = entry [1 ]
101
-
102
100
comparisons += 1
103
101
sucos_score , fm_score , vol_score = sucos .get_SucosScore (hit , mol ,
104
- tani = False , ref_features = ref_features , query_features = query_features )
102
+ tani = False , ref_features = ref_features ,
103
+ query_features = query_features )
105
104
106
- if sucos_score > max_scores [0 ]:
107
- max_scores [0 ] = sucos_score
108
- max_scores [1 ] = fm_score
109
- max_scores [2 ] = vol_score
105
+ if sucos_score > scores_max [0 ]:
106
+ scores_max [0 ] = sucos_score
107
+ scores_max [1 ] = fm_score
108
+ scores_max [2 ] = vol_score
110
109
cluster_index = index
111
110
best_id = mol_ids [index ]
112
111
113
- cum_scores [0 ] += sucos_score
114
- cum_scores [1 ] += fm_score
115
- cum_scores [2 ] += vol_score
112
+ scores_cum [0 ] += sucos_score
113
+ scores_cum [1 ] += fm_score
114
+ scores_cum [2 ] += vol_score
116
115
117
116
index += 1
118
117
119
- if max_scores [0 ] > 0 :
118
+ # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
119
+ mol .SetDoubleProp (field_SuCOSMax_Score , scores_max [0 ] if scores_max [0 ] > 0 else 0 )
120
+ mol .SetDoubleProp (field_SuCOSMax_FMScore , scores_max [1 ] if scores_max [1 ] > 0 else 0 )
121
+ mol .SetDoubleProp (field_SuCOSMax_ProtrudeScore , scores_max [2 ] if scores_max [2 ] > 0 else 0 )
120
122
121
- # cluster_file_name_only = cluster_name.split(os.sep)[-1]
122
- #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
123
- mol .SetDoubleProp (field_SuCOSMax_Score , max_scores [0 ])
124
- mol .SetDoubleProp (field_SuCOSMax_FMScore , max_scores [1 ])
125
- mol .SetDoubleProp (field_SuCOSMax_ProtrudeScore , max_scores [2 ])
123
+ if best_id :
124
+ mol .SetProp (field_SuCOSMax_Target , best_id )
126
125
mol .SetIntProp (field_SuCOSMax_Index , cluster_index )
127
- if best_id :
128
- mol .SetProp (field_SuCOSMax_Target , best_id )
129
126
130
- if cum_scores [0 ] > 0 :
131
- #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
132
- mol .SetDoubleProp (field_SuCOSCum_Score , cum_scores [0 ])
133
- mol .SetDoubleProp (field_SuCOSCum_FMScore , cum_scores [1 ])
134
- mol .SetDoubleProp (field_SuCOSCum_ProtrudeScore , cum_scores [2 ])
127
+ # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
128
+ mol .SetDoubleProp (field_SuCOSCum_Score , scores_cum [0 ] if scores_cum [0 ] > 0 else 0 )
129
+ mol .SetDoubleProp (field_SuCOSCum_FMScore , scores_cum [1 ] if scores_cum [1 ] > 0 else 0 )
130
+ mol .SetDoubleProp (field_SuCOSCum_ProtrudeScore , scores_cum [2 ] if scores_cum [2 ] > 0 else 0 )
135
131
136
- writer .write (mol )
137
132
133
+ if filter_value and filter_field :
134
+ if mol .HasProp (filter_field ):
135
+ val = mol .GetDoubleProp (filter_field )
136
+ if val > filter_value :
137
+ writer .write (mol )
138
+ else :
139
+ writer .write (mol )
138
140
139
141
utils .log ("Completed" , comparisons , "comparisons" )
140
142
return mol_num , comparisons , errors
@@ -148,12 +150,15 @@ def main():
148
150
parser .add_argument ('-tm' , '--target-molecules' , help = 'Target molecules to compare against' )
149
151
parser .add_argument ('-tf' , '--targets-format' , help = 'Target molecules format' )
150
152
parser .add_argument ('-n' , '--name-field' , help = 'Name of field with molecule name' )
153
+ parser .add_argument ('--no-gzip' , action = 'store_true' , help = 'Do not compress the output (STDOUT is never compressed' )
154
+ parser .add_argument ('--filter-value' , type = float , help = 'Filter out values with scores less than this.' )
155
+ parser .add_argument ('--filter-field' , help = 'Field to use to filter values.' )
151
156
152
157
args = parser .parse_args ()
153
158
utils .log ("Max SuCOSMax Args: " , args )
154
159
155
160
source = "sucos_max.py"
156
- datasetMetaProps = {"source" :source , "description" : "SuCOSMax using RDKit " + rdBase .rdkitVersion }
161
+ datasetMetaProps = {"source" : source , "description" : "SuCOSMax using RDKit " + rdBase .rdkitVersion }
157
162
clsMappings = {}
158
163
fieldMetaProps = []
159
164
@@ -165,29 +170,37 @@ def main():
165
170
clsMappings [field_SuCOSCum_FMScore ] = "java.lang.Float"
166
171
clsMappings [field_SuCOSCum_ProtrudeScore ] = "java.lang.Float"
167
172
168
- fieldMetaProps .append ({"fieldName" :field_SuCOSMax_Score , "values" : {"source" :source , "description" :"SuCOS Max score" }})
169
- fieldMetaProps .append ({"fieldName" :field_SuCOSMax_FMScore , "values" : {"source" :source , "description" :"SuCOS Max Feature Map score" }})
170
- fieldMetaProps .append ({"fieldName" :field_SuCOSMax_ProtrudeScore , "values" : {"source" :source , "description" :"SuCOS Max Protrude score" }})
171
- fieldMetaProps .append ({"fieldName" :field_SuCOSMax_Index , "values" : {"source" :source , "description" :"SuCOS Max target index" }})
172
- fieldMetaProps .append ({"fieldName" :field_SuCOSCum_Score , "values" : {"source" :source , "description" :"SuCOS Cumulative score" }})
173
- fieldMetaProps .append ({"fieldName" :field_SuCOSCum_FMScore , "values" : {"source" :source , "description" :"SuCOS Cumulative Feature Map score" }})
174
- fieldMetaProps .append ({"fieldName" :field_SuCOSCum_ProtrudeScore , "values" : {"source" :source , "description" :"SuCOS Cumulative Protrude score" }})
173
+ fieldMetaProps .append (
174
+ {"fieldName" : field_SuCOSMax_Score , "values" : {"source" : source , "description" : "SuCOS Max score" }})
175
+ fieldMetaProps .append ({"fieldName" : field_SuCOSMax_FMScore ,
176
+ "values" : {"source" : source , "description" : "SuCOS Max Feature Map score" }})
177
+ fieldMetaProps .append ({"fieldName" : field_SuCOSMax_ProtrudeScore ,
178
+ "values" : {"source" : source , "description" : "SuCOS Max Protrude score" }})
179
+ fieldMetaProps .append (
180
+ {"fieldName" : field_SuCOSMax_Index , "values" : {"source" : source , "description" : "SuCOS Max target index" }})
181
+ fieldMetaProps .append (
182
+ {"fieldName" : field_SuCOSCum_Score , "values" : {"source" : source , "description" : "SuCOS Cumulative score" }})
183
+ fieldMetaProps .append ({"fieldName" : field_SuCOSCum_FMScore ,
184
+ "values" : {"source" : source , "description" : "SuCOS Cumulative Feature Map score" }})
185
+ fieldMetaProps .append ({"fieldName" : field_SuCOSCum_ProtrudeScore ,
186
+ "values" : {"source" : source , "description" : "SuCOS Cumulative Protrude score" }})
175
187
176
188
if args .name_field :
177
189
clsMappings [field_SuCOSMax_Target ] = "java.lang.String"
178
- fieldMetaProps .append ({ "fieldName" : field_SuCOSMax_Target , "values" : { "source" : source , "description" : "SuCOS Max target name" }})
179
-
190
+ fieldMetaProps .append (
191
+ { "fieldName" : field_SuCOSMax_Target , "values" : { "source" : source , "description" : "SuCOS Max target name" }})
180
192
181
- inputs_file ,output ,inputs_supplr ,writer ,output_base = rdkit_utils . \
182
- default_open_input_output (args .input , args .informat , args .output ,
183
- 'sucos-max' , args .outformat ,
184
- valueClassMappings = clsMappings ,
185
- datasetMetaProps = datasetMetaProps ,
186
- fieldMetaProps = fieldMetaProps )
193
+ inputs_file , inputs_supplr = rdkit_utils .default_open_input (args .input , args .informat )
194
+ output , writer , output_base = rdkit_utils .default_open_output (args .output ,
195
+ 'sucos-max' , args .outformat ,
196
+ valueClassMappings = clsMappings ,
197
+ datasetMetaProps = datasetMetaProps ,
198
+ fieldMetaProps = fieldMetaProps ,
199
+ compress = not args .no_gzip )
187
200
188
201
targets_file , targets_supplr = rdkit_utils .default_open_input (args .target_molecules , args .targets_format )
189
202
190
- count , total , errors = process (inputs_supplr , targets_supplr , writer , args .name_field )
203
+ count , total , errors = process (inputs_supplr , targets_supplr , writer , args .name_field , args . filter_value , args . filter_field )
191
204
192
205
inputs_file .close ()
193
206
targets_file .close ()
@@ -196,8 +209,9 @@ def main():
196
209
output .close ()
197
210
198
211
if args .meta :
199
- utils .write_metrics (output_base , {'__InputCount__' :count , '__OutputCount__' :total , '__ErrorCount__' :errors , 'RDKitSuCOS' :total })
212
+ utils .write_metrics (output_base , {'__InputCount__' : count , '__OutputCount__' : total , '__ErrorCount__' : errors ,
213
+ 'RDKitSuCOS' : total })
200
214
201
215
202
216
if __name__ == "__main__" :
203
- main ()
217
+ main ()
0 commit comments