Skip to content

Commit 9376cb2

Browse files
committed
Add post-process script
1 parent 3e32866 commit 9376cb2

File tree

3 files changed

+102
-1
lines changed

3 files changed

+102
-1
lines changed

sampledock/SnD/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .docking import dock, sort_pose, save_pose
22
from .pocket_prepare import prep_prm
3-
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
3+
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
4+
from .post_process import mkdf, combine_designs

sampledock/SnD/post_process.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import pandas as pd
2+
from rdkit import Chem
3+
from rdkit.Chem import AllChem, Draw
4+
import os
5+
6+
from rdkit.Chem import RDConfig # Allow Contrib packages to be used
7+
from rdkit.Chem.Crippen import MolLogP as LogP
8+
from rdkit.Chem.QED import default as QED
9+
from rdkit.Chem.Descriptors import MolWt
10+
import sys
11+
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
12+
from sascorer import calculateScore as SAS
13+
14+
def mkdf(directory,output):
15+
folders = [x for x in os.listdir(directory) if x.startswith('cycle_')]
16+
if len(folders) == 0:
17+
raise Exception('No "cycle_" folder found!')
18+
scores = pd.DataFrame()
19+
for i,fd in enumerate(folders):
20+
df = pd.DataFrame()
21+
fd_path = os.path.join(directory,fd)
22+
mols = Chem.SDMolSupplier(fd_path+'/ranked_designs.sd')
23+
df['Design'] = [m.GetProp('Name') for m in mols]
24+
df['Cycle'] = i
25+
df['Score'] = [float(m.GetProp('SCORE.INTER')) for m in mols]
26+
df['SMILES'] = [m.GetProp('SMILES') for m in mols]
27+
df['Mol'] = [m for m in mols]
28+
df['LogP'] = [LogP(m) for m in mols]
29+
df['QED'] = [QED(m) for m in mols]
30+
df['MolWt'] = [MolWt(m) for m in mols]
31+
df['SAS'] = [SAS(m) for m in mols]
32+
scores = pd.concat([scores,df])
33+
34+
minscores = scores[scores.index == 0]
35+
minscores = minscores.sort_values('Score')
36+
minscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
37+
scores.to_csv(output+'/all_design.csv')
38+
minscores.to_csv(output+'/best_designs.csv')
39+
print("DataFrames Saved!")
40+
return scores, minscores
41+
42+
def combine_designs(directory, output):
43+
folders = [x for x in os.listdir(directory) if x.startswith('cycle_')]
44+
if len(folders) == 0:
45+
raise Exception('No "cycle_" folder found!')
46+
mols = []
47+
best_mols = []
48+
wa = Chem.SDWriter(output+'/All_Designs.sdf')
49+
wb = Chem.SDWriter(output+'/Best_Designs.sdf')
50+
for fd in folders:
51+
cycle = fd.strip("cycle_")
52+
sd = directory+'/'+fd+'/ranked_designs.sd'
53+
if os.path.exists(sd):
54+
cir_mols = Chem.SDMolSupplier(sd)
55+
for i, m in enumerate(cir_mols):
56+
m.SetProp('Cycle',cycle)
57+
m.SetProp('MolWeight', str(MolWt(m)))
58+
m.SetProp('LogP', str(LogP(m)))
59+
m.SetProp('QED', str(QED(m)))
60+
m.SetProp('SAS', str(SAS(m)))
61+
mols.append(m)
62+
wa.write(m)
63+
if i == 0:
64+
# Select the highest score design in the cycle
65+
best_mols.append(m)
66+
wb.write(m)
67+
if int(cycle)%5000 == 0:
68+
wa.flush()
69+
wb.flush()
70+
wa.close()
71+
wb.close()
72+
print(len(mols), "total molecules combined from", len(folders),"cycles in\n", directory)
73+
print(len(best_mols), "selected")
74+
sys.stdout.flush()
75+
return mols, best_mols
76+
77+
if __name__ == "__main__":
78+
import argparse
79+
parser = argparse.ArgumentParser(description="combine and the ranked_designs.sd in each "+
80+
"'cycle_*' folder from Sample and Dock and calculate MolWeight, SAS, LogP, and QED.")
81+
parser.add_argument("-i","--input", help="input directory that contain folder by cycles")
82+
parser.add_argument("-o","--outpath", help="output directory for the combined sdf file",
83+
default='./')
84+
a = parser.parse_args()
85+
directory = os.path.abspath(a.input)
86+
out = os.path.abspath(a.outpath)
87+
if not os.path.exists(out):
88+
os.makedirs(out)
89+
print(out, "Made")
90+
combine_designs(directory, out)
91+
mkdf(directory, out)

sampledock/__main__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .SnD import prep_prm
2626
from .SnD import dock, sort_pose, save_pose
2727
from .SnD import hyperparam_loader, create_wd, smiles_to_sdfile
28+
from .SnD import combine_designs, mkdf
2829

2930
# Load hyper parameters
3031
p = hyperparam_loader(a.params)
@@ -99,3 +100,11 @@
99100

100101
print("[INFO]: Cycle %s: %s %s kcal/mol"%(j, smi, energy)+'\t'*6)
101102

103+
# Create post-process working directory
104+
postproc_wd = os.path.join(wd, "All_Designs_Processed")
105+
os.makedirs(postproc_wd)
106+
# Extract all ranked designs from ejach cycle and combine in one sdf file
107+
combine_designs(wd, postproc_wd)
108+
# Create pandas dataframe for summary
109+
mkdf(wd, postproc_wd)
110+

0 commit comments

Comments
 (0)