Bug fix on tmap plotter

Truman-Xu · Truman-Xu · commit eb3973914252 · 2021-04-21T01:27:42.000-04:00
diff --git a/environment.yml b/environment.yml
@@ -1,13 +1,19 @@
 name: sampledock
 channels:
   - pytorch
-  - rdkit
   - conda-forge
   - bioconda
-
+  - tmap
+  - pip
+  - pip:
+      - faerun
+      - mhfp
+      
 dependencies:
-  - python=3.7
+  - python>=3.7
   - pytorch
   - rdkit>=2020.03.3.0
   - scipy
-  - rxdock
+  - rxdock
+  - tmap
+  - tqdm
diff --git a/sampledock/SnD/post_process.py b/sampledock/SnD/post_process.py
@@ -11,6 +11,7 @@
 import os
 from multiprocessing import Pool
 from itertools import repeat
+from collections import namedtuple
 
 from rdkit.Chem.PropertyMol import PropertyMol # Allow pickle on mol props for multiprocessing
 from rdkit.Chem import RDConfig # Allow Contrib packages to be used
@@ -84,31 +85,32 @@ def combine_designs(inpath, outpath):
 
     return all_mols, best_mols
 
-def create_df(mol_list):
-    # Create a dataframe with all these mol properties
-    # These props should exist if the designs are post-processed by funtions above 
-    mol_props = ['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS']
-    df = pd.DataFrame()
-
-    # Fill df with lists 
-    # (append by entry using dicts from each mol increases data overhead and is slow)
-    for prop in mol_props:
-        df[prop] = [m.GetProp(prop) for m in mol_list]
-        # Convert strings to possible numeric dtypes
-        try:
-            inferred_type = pd.to_numeric(df[prop]).dtype
-            df[prop] = df[prop].astype(inferred_type)
-        except ValueError:
-            pass
-    return df
+def df_from_molProps(mol_list):
+    # declare a named tuple
+    Prop = namedtuple('Prop',['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS'])
+    props = [
+              (
+                  mol.GetProp('Name'),
+                  int(mol.GetProp('Cycle')),
+                  ## The score option is hard coded for now, will change everything to OOP later
+                  float(mol.GetProp('SCORE.INTER')),
+                  mol.GetProp('SMILES'),
+                  float(mol.GetProp('LogP')),
+                  float(mol.GetProp('QED')),
+                  float(mol.GetProp('MolWeight')),
+                  float(mol.GetProp('SAS'))
+              ) for mol in mol_list]
+    # Make it a named tuple
+    props = [Prop._make(p) for p in props]
+    return pd.DataFrame(props)
 
 def mkdf(all_mols, best_mols, outpath):
     # Create dataframe from the lists
-    allscores = create_df(all_mols)
-    minscores = create_df(best_mols)
+    allscores = df_from_molProps(all_mols)
+    minscores = df_from_molProps(best_mols)
 
     # sort the dataframe based on docking scores
-    sortedscores = minscores.sort_values('SCORE.INTER')
+    sortedscores = minscores.sort_values('Score')
     # Drop dulicated entries
     sortedscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
 
diff --git a/sampledock/__main__.py b/sampledock/__main__.py
@@ -12,7 +12,7 @@
 import os
 import sys
 import subprocess
-
+import pickle
 from rdkit import rdBase
 ## Disable rdkit Logs
 rdBase.DisableLog('rdApp.error')
@@ -105,14 +105,18 @@
 # Create pandas dataframe for summary
 allscores, _ = mkdf(allmols, bestmols, postproc_wd)
 # Make LSH Forest 
-lf = LSH_Convert(allmols, outpath)
+lf = LSH_Convert(allmols, postproc_wd, num_workers = os.cpu_count()-1)
 # Get LSH Tree Coords
-x, y, s, t = tree_coords(lf)
-allscores['x'] = x
-allscores['y'] = y
-allscores['s'] = s
-allscores['t'] = t
-# Save dataframe again
-allscores.to_csv(os.path.join(postproc_wd,"allscores.csv"),index = False)
+x, y, s, t = tree_coords(lf, 
+                         node_size = float(eval(p.node_size)), 
+                         k = int(p.k), 
+                         mmm_rps = int(p.mmm_repeats))
+
+# Save coords
+with open(os.path.join(postproc_wd,"coords.pickle"),'wb') as f:
+    pickle.dump((x,y,s,t),f)
 # Create tmap on faerun
-df_to_faerun(allscores)
+f = df_to_faerun(allscores,x,y,s,t)
+
+with open(os.path.join(postproc_wd,'SampleDock.faerun'), 'wb') as handle:
+    pickle.dump(f.create_python_data(), handle, protocol=pickle.HIGHEST_PROTOCOL)