Skip to content

Commit dfbcfae

Browse files
committed
added standardize module
1 parent 9066d85 commit dfbcfae

File tree

2 files changed

+102
-1
lines changed

2 files changed

+102
-1
lines changed

Dockerfile-rdkit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM informaticsmatters/rdkit-python-debian:Release_2018_03_01
1+
FROM informaticsmatters/rdkit-python-debian:Release_2018_09_1
22
LABEL maintainer="Tim Dudgeon<[email protected]>"
33

44
USER root
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Informatics Matters Ltd.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import argparse
18+
19+
from rdkit import DataStructs, rdBase
20+
from rdkit.Chem.MolStandardize import rdMolStandardize
21+
22+
from pipelines_utils import parameter_utils, utils
23+
from pipelines_utils_rdkit import rdkit_utils, mol_utils
24+
25+
26+
### functions #########################################
27+
28+
#lfc = rdMolStandardize.LargestFragmentChooser()
29+
uncharger = rdMolStandardize.Uncharger()
30+
31+
32+
def standardize(mol, neutralise, fragment):
33+
"""
34+
35+
:param mol: The molecule to standardize
36+
:param neutralise: Boolean for whether to neutralise the molecule
37+
:param fragment: The approach for choosing the largest fragment. Either 'hac' or 'mw'. If not specified the whole
38+
molecule is used.
39+
:return: The standardized molecule
40+
"""
41+
mol = rdMolStandardize.Cleanup(mol)
42+
#mol = lfc.choose(mol)
43+
# We use our own largest fragment picker as the RDKit one behaves slightly differently
44+
if fragment:
45+
mol = mol_utils.fragment(mol, fragment)
46+
if neutralise:
47+
mol = uncharger.uncharge(mol)
48+
return mol
49+
50+
51+
### start main execution #########################################
52+
53+
def main():
54+
55+
### command line args definitions #########################################
56+
57+
parser = argparse.ArgumentParser(description='RDKit Standardize')
58+
parser.add_argument('--fragment-method', choices=['hac', 'mw'], help='Approach to find biggest fragment if more than one (hac = biggest by heavy atom count, mw = biggest by mol weight)')
59+
parser.add_argument('--neutralise', action='store_true', help='Neutralise the molecule')
60+
61+
parameter_utils.add_default_io_args(parser)
62+
parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode')
63+
parser.add_argument('--thin', action='store_true', help='Thin output mode')
64+
65+
args = parser.parse_args()
66+
utils.log("Standardize Args: ", args)
67+
68+
# handle metadata
69+
source = "standardize.py"
70+
datasetMetaProps = {"source":source, "description": "Standardize using RDKit " + rdBase.rdkitVersion}
71+
clsMappings = {}
72+
fieldMetaProps = []
73+
74+
75+
input,output,suppl,writer,output_base = rdkit_utils.\
76+
default_open_input_output(args.input, args.informat, args.output,
77+
'standardize', args.outformat,
78+
thinOutput=False, valueClassMappings=clsMappings,
79+
datasetMetaProps=datasetMetaProps,
80+
fieldMetaProps=fieldMetaProps)
81+
i = 0
82+
total = 0
83+
for mol in suppl:
84+
if mol is None:
85+
i += 1
86+
continue
87+
m = standardize(mol, args.neutralise, args.fragment_method)
88+
writer.write(m)
89+
total += 1
90+
91+
input.close()
92+
writer.flush()
93+
writer.close()
94+
output.close()
95+
96+
if args.meta:
97+
utils.write_metrics(output_base, {'__InputCount__':i, '__OutputCount__':total, 'RDKitStandardize':i})
98+
99+
if __name__ == "__main__":
100+
main()
101+

0 commit comments

Comments
 (0)