Skip to content

Commit eb39739

Browse files
committed
Bug fix on tmap plotter
1 parent 438599f commit eb39739

File tree

3 files changed

+46
-34
lines changed

3 files changed

+46
-34
lines changed

environment.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
name: sampledock
22
channels:
33
- pytorch
4-
- rdkit
54
- conda-forge
65
- bioconda
7-
6+
- tmap
7+
- pip
8+
- pip:
9+
- faerun
10+
- mhfp
11+
812
dependencies:
9-
- python=3.7
13+
- python>=3.7
1014
- pytorch
1115
- rdkit>=2020.03.3.0
1216
- scipy
13-
- rxdock
17+
- rxdock
18+
- tmap
19+
- tqdm

sampledock/SnD/post_process.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
from multiprocessing import Pool
1313
from itertools import repeat
14+
from collections import namedtuple
1415

1516
from rdkit.Chem.PropertyMol import PropertyMol # Allow pickle on mol props for multiprocessing
1617
from rdkit.Chem import RDConfig # Allow Contrib packages to be used
@@ -84,31 +85,32 @@ def combine_designs(inpath, outpath):
8485

8586
return all_mols, best_mols
8687

87-
def create_df(mol_list):
88-
# Create a dataframe with all these mol properties
89-
# These props should exist if the designs are post-processed by funtions above
90-
mol_props = ['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS']
91-
df = pd.DataFrame()
92-
93-
# Fill df with lists
94-
# (append by entry using dicts from each mol increases data overhead and is slow)
95-
for prop in mol_props:
96-
df[prop] = [m.GetProp(prop) for m in mol_list]
97-
# Convert strings to possible numeric dtypes
98-
try:
99-
inferred_type = pd.to_numeric(df[prop]).dtype
100-
df[prop] = df[prop].astype(inferred_type)
101-
except ValueError:
102-
pass
103-
return df
88+
def df_from_molProps(mol_list):
89+
# declare a named tuple
90+
Prop = namedtuple('Prop',['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS'])
91+
props = [
92+
(
93+
mol.GetProp('Name'),
94+
int(mol.GetProp('Cycle')),
95+
## The score option is hard coded for now, will change everything to OOP later
96+
float(mol.GetProp('SCORE.INTER')),
97+
mol.GetProp('SMILES'),
98+
float(mol.GetProp('LogP')),
99+
float(mol.GetProp('QED')),
100+
float(mol.GetProp('MolWeight')),
101+
float(mol.GetProp('SAS'))
102+
) for mol in mol_list]
103+
# Make it a named tuple
104+
props = [Prop._make(p) for p in props]
105+
return pd.DataFrame(props)
104106

105107
def mkdf(all_mols, best_mols, outpath):
106108
# Create dataframe from the lists
107-
allscores = create_df(all_mols)
108-
minscores = create_df(best_mols)
109+
allscores = df_from_molProps(all_mols)
110+
minscores = df_from_molProps(best_mols)
109111

110112
# sort the dataframe based on docking scores
111-
sortedscores = minscores.sort_values('SCORE.INTER')
113+
sortedscores = minscores.sort_values('Score')
112114
# Drop dulicated entries
113115
sortedscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
114116

sampledock/__main__.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import os
1313
import sys
1414
import subprocess
15-
15+
import pickle
1616
from rdkit import rdBase
1717
## Disable rdkit Logs
1818
rdBase.DisableLog('rdApp.error')
@@ -105,14 +105,18 @@
105105
# Create pandas dataframe for summary
106106
allscores, _ = mkdf(allmols, bestmols, postproc_wd)
107107
# Make LSH Forest
108-
lf = LSH_Convert(allmols, outpath)
108+
lf = LSH_Convert(allmols, postproc_wd, num_workers = os.cpu_count()-1)
109109
# Get LSH Tree Coords
110-
x, y, s, t = tree_coords(lf)
111-
allscores['x'] = x
112-
allscores['y'] = y
113-
allscores['s'] = s
114-
allscores['t'] = t
115-
# Save dataframe again
116-
allscores.to_csv(os.path.join(postproc_wd,"allscores.csv"),index = False)
110+
x, y, s, t = tree_coords(lf,
111+
node_size = float(eval(p.node_size)),
112+
k = int(p.k),
113+
mmm_rps = int(p.mmm_repeats))
114+
115+
# Save coords
116+
with open(os.path.join(postproc_wd,"coords.pickle"),'wb') as f:
117+
pickle.dump((x,y,s,t),f)
117118
# Create tmap on faerun
118-
df_to_faerun(allscores)
119+
f = df_to_faerun(allscores,x,y,s,t)
120+
121+
with open(os.path.join(postproc_wd,'SampleDock.faerun'), 'wb') as handle:
122+
pickle.dump(f.create_python_data(), handle, protocol=pickle.HIGHEST_PROTOCOL)

0 commit comments

Comments
 (0)