Skip to content

Commit e59943b

Browse files
committed
Merge branch 'master' of https://github.com/atfrank/SampleDock with command option changes
2 parents 967284c + 2bc0e88 commit e59943b

File tree

5 files changed

+154
-8
lines changed

5 files changed

+154
-8
lines changed

hyper.param

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
## All relative path anchors on the location of working directory not this file
1+
## All relative path starts from the location of working directory not this file
22

33
############### Parameters for SnD #####################
4-
receptor_name = CDK2-5IEV # name the receptor to create dir
5-
receptor_file = ../targets/CDK2_5IEV/5IEV.mol2 # must be mol2 format
6-
ligand_file = ../targets/CDK2_5IEV/Roniciclib.sd # must be sd format
4+
receptor_name = nCoV_6LU7 # name the receptor to create dir
5+
receptor_file = ./targets/nCoV_6LU7/6LU7.mol2 # must be mol2 format
6+
ligand_file = ./targets/nCoV_6LU7/N3.sdf # must be sd format
77
ncycle = 1000 # number of cycles to be run
88
ndesign = 20 # number of designs to be generated per cycle
99
seed_smi = C1=CC=CC=C1 # initial seeding SMILES for the first cycle, default to benzene

sampledock/SnD/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .docking import dock, sort_pose, save_pose
22
from .pocket_prepare import prep_prm
3-
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
3+
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
4+
from .post_process import mkdf, combine_designs

sampledock/SnD/docking.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
def dock(ligs, dock_dir, prmfile, docking_prm, npose, prefix = 'docked'):
99
# ligs must be a list of file path
10-
print('Docking in Progress\t', end = '\r')
10+
print('[INFO]: Docking in Progress\t', end = '\r')
1111
sys.stdout.flush()
1212
procs = []
1313
for i,lig in enumerate(ligs):
@@ -21,7 +21,7 @@ def dock(ligs, dock_dir, prmfile, docking_prm, npose, prefix = 'docked'):
2121
for proc in procs:
2222
# makes sure the docking has completed before this function ends
2323
proc.wait()
24-
print('Docking Complete! \t', end = '\r')
24+
print('[INFO]: Docking Complete! \t', end = '\r')
2525
sys.stdout.flush()
2626

2727
def sort_pose(dock_dir, sort_by, prefix = None):
@@ -46,7 +46,7 @@ def sort_pose(dock_dir, sort_by, prefix = None):
4646
# retrieve the best pose mol for each design
4747
best_pose = sorted_poses[0]
4848
best_poses.append((float(best_pose.GetProp(sort_by)),best_pose.GetProp('Name'),best_pose))
49-
print('Docked Poses Sorted \t', end = '\r')
49+
print('[INFO]: Docked Poses Sorted \t', end = '\r')
5050
sys.stdout.flush()
5151
# return the sorted tuple (ranked design based on the score of the best pose)
5252
return sorted(best_poses)

sampledock/SnD/post_process.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Post processing script for sample and dock generated molecules
2+
3+
import pandas as pd
4+
from rdkit import Chem
5+
from rdkit.Chem import AllChem, Draw
6+
import os
7+
from multiprocessing import Pool
8+
from itertools import repeat
9+
10+
from rdkit.Chem.PropertyMol import PropertyMol # Allow pickle on mol props for multiprocessing
11+
from rdkit.Chem import RDConfig # Allow Contrib packages to be used
12+
from rdkit.Chem.Crippen import MolLogP as LogP # Lipophilicity
13+
from rdkit.Chem.QED import default as QED # Quantitiative Estimate of Drug-likeness
14+
from rdkit.Chem.Descriptors import MolWt # Mol Weight
15+
import sys
16+
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
17+
# add path for rdkit Contrib packages
18+
from sascorer import calculateScore as SAS # Sythetic Accessiblilty Score
19+
20+
# Function for calculate mol properties for sd files in each folder for multiprocessing
21+
def process_by_folder(fd, inpath):
22+
cycle = fd.strip("cycle_")
23+
sd = inpath+'/'+fd+'/ranked_designs.sd'
24+
if os.path.exists(sd):
25+
cir_mols = [PropertyMol(m) for m in Chem.SDMolSupplier(sd)]
26+
for i,m in enumerate(cir_mols):
27+
# Calculate properties for each mol
28+
m.SetProp('Cycle',cycle)
29+
m.SetProp('MolWeight', str(MolWt(m)))
30+
m.SetProp('LogP', str(LogP(m)))
31+
m.SetProp('QED', str(QED(m)))
32+
m.SetProp('SAS', str(SAS(m)))
33+
if i == 0:
34+
# Select the highest score design in the cycle
35+
best_mol = m
36+
return cir_mols, best_mol
37+
38+
# calculated mol properties from each cycle and combine mols in one sdf file
39+
def combine_designs(inpath, outpath):
40+
# list the folders in the directory for all cycles
41+
folders = [x for x in os.listdir(inpath) if x.startswith('cycle_')]
42+
# sort folder name
43+
folders.sort(key=lambda x: int(x.strip('cycle_')))
44+
45+
if len(folders) == 0:
46+
raise Exception('No "cycle_" folder found!')
47+
48+
# Multiprocessing
49+
with Pool(processes = os.cpu_count()-1) as pool:
50+
results = pool.starmap(process_by_folder, zip(folders, repeat(inpath)))
51+
52+
# Retrieve results
53+
mol_lists, best_mols = zip(*results)
54+
# Create the list of all mols
55+
all_mols = []
56+
for l in mol_lists:
57+
all_mols.extend(l)
58+
# Convert tuple to list
59+
best_mols = list(best_mols)
60+
61+
print(len(all_mols), "total molecules combined from", len(folders),"cycles in\n", inpath)
62+
print(len(best_mols), "best designs extracted.\n")
63+
sys.stdout.flush()
64+
65+
# Save as sdf
66+
with open(outpath+'/All_Designs.sdf','w') as outfile:
67+
w = Chem.SDWriter(outfile)
68+
for m in all_mols:
69+
w.write(m)
70+
w.close()
71+
72+
with open(outpath+'/Best_Designs.sdf','w') as outfile:
73+
w = Chem.SDWriter(outfile)
74+
for m in best_mols:
75+
w.write(m)
76+
w.close()
77+
print('Mols saved!')
78+
sys.stdout.flush()
79+
80+
return all_mols, best_mols
81+
82+
# Create dataframe with all the properties
83+
def create_df(mol_list):
84+
df = pd.DataFrame()
85+
86+
df['Design'] = [m.GetProp('Name') for m in mol_list]
87+
df['Cycle'] = [int(m.GetProp('Cycle')) for m in mol_list]
88+
df['Score'] = [float(m.GetProp('SCORE.INTER')) for m in mol_list]
89+
df['SMILES'] = [m.GetProp('SMILES') for m in mol_list]
90+
df['Mol'] = [m for m in mol_list]
91+
df['LogP'] = [float(m.GetProp('LogP')) for m in mol_list]
92+
df['QED'] = [float(m.GetProp('QED')) for m in mol_list]
93+
df['MolWt'] = [float(m.GetProp('MolWeight')) for m in mol_list]
94+
df['SAS'] = [float(m.GetProp('SAS')) for m in mol_list]
95+
96+
return df
97+
98+
def mkdf(all_mols, best_mols, outpath):
99+
# Create dataframe from the lists
100+
allscores = create_df(all_mols)
101+
minscores = create_df(best_mols)
102+
103+
# sort the dataframe based on docking scores
104+
sortedscores = minscores.sort_values('Score')
105+
# Drop dulicated entries
106+
sortedscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
107+
108+
# Save as csv
109+
allscores.drop(columns=['Mol']).to_csv(outpath+'/allscores.csv', index = False)
110+
sortedscores.drop(columns=['Mol']).to_csv(outpath+'/sortedscores.csv', index = False)
111+
print('Dataframes saved!')
112+
sys.stdout.flush()
113+
return allscores, minscores
114+
115+
if __name__ == "__main__":
116+
import argparse
117+
parser = argparse.ArgumentParser(description="combine and the ranked_designs.sd in each "+
118+
"'cycle_*' folder from Sample and Dock and calculate MolWeight, SAS, LogP, and QED.")
119+
parser.add_argument("-i","--input", help="input directory that contain folder by cycles")
120+
parser.add_argument("-o","--outpath", help="output directory for the combined sdf file,"+\
121+
"default to ./processed_data")
122+
a = parser.parse_args()
123+
inpath = os.path.abspath(a.input)
124+
125+
if a.outpath:
126+
outpath = os.path.abspath(a.outpath)
127+
else: outpath = inpath+"/All_Designs_Processed/"
128+
129+
if not os.path.exists(outpath):
130+
os.makedirs(outpath)
131+
print("Directory Made:")
132+
print(outpath)
133+
sys.stdout.flush()
134+
allmols, bestmols = combine_designs(inpath, outpath)
135+
mkdf(allmols, bestmols, outpath)

sampledock/__main__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .SnD import prep_prm
2626
from .SnD import dock, sort_pose, save_pose
2727
from .SnD import hyperparam_loader, create_wd, smiles_to_sdfile
28+
from .SnD import combine_designs, mkdf
2829

2930
# Load hyper parameters
3031
p = hyperparam_loader(a.params)
@@ -99,3 +100,12 @@
99100

100101
print("[INFO]: Cycle %s: %s %s kcal/mol"%(j, smi, energy)+'\t'*6)
101102

103+
print("\n", p.ncycle, "cycles of design finished. Starting post-processing.")
104+
# Create post-process working directory
105+
postproc_wd = os.path.join(wd, "All_Designs_Processed")
106+
os.makedirs(postproc_wd)
107+
# Extract all ranked designs from ejach cycle and combine in one sdf file
108+
allmols, bestmols = combine_designs(wd, postproc_wd)
109+
# Create pandas dataframe for summary
110+
mkdf(allmols, bestmols, postproc_wd)
111+

0 commit comments

Comments
 (0)