1
1
#!/usr/bin/env python
2
2
3
- # Copyright 2017 Informatics Matters Ltd.
3
+ # Copyright 2019 Informatics Matters Ltd.
4
4
#
5
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
6
# you may not use this file except in compliance with the License.
14
14
# See the License for the specific language governing permissions and
15
15
# limitations under the License.
16
16
17
- ### Use MolVS to do tautomer enumeration, sterochemistry enumeration, charge neutralisation.
17
+ ### Use MolVS to do tautomer enumeration, stereochemistry enumeration, charge neutralisation.
18
18
19
19
import sys , argparse
20
20
@@ -73,7 +73,28 @@ def main():
73
73
if args .standardize :
74
74
getStandardMolecule = STANDARD_MOL_METHODS [args .standardize_method ]
75
75
76
- input ,output ,suppl ,writer ,output_base = rdkit_utils .default_open_input_output (args .input , args .informat , args .output , 'sanify' , args .outformat )
76
+ # handle metadata
77
+ source = "sanifier.py"
78
+ datasetMetaProps = {"source" :source , "description" : "Enumerate tautomers and stereoisomers" }
79
+ clsMappings = {
80
+ "EnumTautIsoSourceMolUUID" : "java.lang.String" ,
81
+ "EnumTautIsoSourceMolIdx" : "java.lang.Integer"
82
+ }
83
+ fieldMetaProps = [
84
+ {"fieldName" :"EnumTautIsoSourceMolUUID" , "values" : {"source" :source , "description" :"UUID of source molecule" }},
85
+ {"fieldName" :"EnumTautIsoSourceMolIdx" , "values" : {"source" :source , "description" :"Index of source molecule" }}
86
+ ]
87
+
88
+ oformat = utils .determine_output_format (args .outformat )
89
+
90
+ input ,output ,suppl ,writer ,output_base = rdkit_utils . \
91
+ default_open_input_output (args .input , args .informat , args .output ,
92
+ 'sanifier' , args .outformat ,
93
+ thinOutput = False , valueClassMappings = clsMappings ,
94
+ datasetMetaProps = datasetMetaProps ,
95
+ fieldMetaProps = fieldMetaProps )
96
+
97
+
77
98
i = 0
78
99
count = 0
79
100
errors = 0
@@ -110,11 +131,13 @@ def main():
110
131
parentUuid = None
111
132
112
133
results = []
113
- results . append ( mol )
134
+
114
135
115
136
if args .enumerate_tauts :
116
137
utils .log ("Enumerating tautomers" )
117
138
results = enumerateTautomers (mol )
139
+ else :
140
+ results .append (mol )
118
141
119
142
if args .enumerate_stereo :
120
143
utils .log ("Enumerating steroisomers" )
@@ -125,10 +148,14 @@ def main():
125
148
results .extend (enumerated )
126
149
127
150
for m in results :
151
+ # copy the src mol props
152
+ for name in mol .GetPropNames ():
153
+ m .SetProp (name , mol .GetProp (name ))
154
+ # add our new props
128
155
m .ClearProp ("uuid" )
129
- m .SetIntProp ("SourceMolNum " , i )
156
+ m .SetIntProp ("EnumTautIsoSourceMolIdx " , i )
130
157
if parentUuid :
131
- m .SetProp ("SourceMolUUID " , parentUuid )
158
+ m .SetProp ("EnumTautIsoSourceMolUUID " , parentUuid )
132
159
133
160
count = write_out (results ,count ,writer ,args .mol_format ,args .outformat )
134
161
@@ -139,6 +166,11 @@ def main():
139
166
input .close ()
140
167
output .close ()
141
168
169
+ # re-write the metadata as we now know the size
170
+ if oformat == 'json' :
171
+ utils .write_squonk_datasetmetadata (output_base , False , clsMappings , datasetMetaProps , fieldMetaProps , size = count )
172
+
173
+
142
174
if args .meta :
143
175
utils .write_metrics (output_base , {'__InputCount__' :i , '__OutputCount__' :count , '__ErrorCount__' :errors , 'RDKitSanify' :count })
144
176
0 commit comments