|
11 | 11 | import os
|
12 | 12 | from multiprocessing import Pool
|
13 | 13 | from itertools import repeat
|
| 14 | +from collections import namedtuple |
14 | 15 |
|
15 | 16 | from rdkit.Chem.PropertyMol import PropertyMol # Allow pickle on mol props for multiprocessing
|
16 | 17 | from rdkit.Chem import RDConfig # Allow Contrib packages to be used
|
@@ -84,31 +85,32 @@ def combine_designs(inpath, outpath):
|
84 | 85 |
|
85 | 86 | return all_mols, best_mols
|
86 | 87 |
|
87 |
| -def create_df(mol_list): |
88 |
| - # Create a dataframe with all these mol properties |
89 |
| - # These props should exist if the designs are post-processed by funtions above |
90 |
| - mol_props = ['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS'] |
91 |
| - df = pd.DataFrame() |
92 |
| - |
93 |
| - # Fill df with lists |
94 |
| - # (append by entry using dicts from each mol increases data overhead and is slow) |
95 |
| - for prop in mol_props: |
96 |
| - df[prop] = [m.GetProp(prop) for m in mol_list] |
97 |
| - # Convert strings to possible numeric dtypes |
98 |
| - try: |
99 |
| - inferred_type = pd.to_numeric(df[prop]).dtype |
100 |
| - df[prop] = df[prop].astype(inferred_type) |
101 |
| - except ValueError: |
102 |
| - pass |
103 |
| - return df |
| 88 | +def df_from_molProps(mol_list): |
| 89 | + # declare a named tuple |
| 90 | + Prop = namedtuple('Prop',['Name','Cycle','Score','SMILES','LogP','QED','MolWeight','SAS']) |
| 91 | + props = [ |
| 92 | + ( |
| 93 | + mol.GetProp('Name'), |
| 94 | + int(mol.GetProp('Cycle')), |
| 95 | + ## The score option is hard coded for now, will change everything to OOP later |
| 96 | + float(mol.GetProp('SCORE.INTER')), |
| 97 | + mol.GetProp('SMILES'), |
| 98 | + float(mol.GetProp('LogP')), |
| 99 | + float(mol.GetProp('QED')), |
| 100 | + float(mol.GetProp('MolWeight')), |
| 101 | + float(mol.GetProp('SAS')) |
| 102 | + ) for mol in mol_list] |
| 103 | + # Make it a named tuple |
| 104 | + props = [Prop._make(p) for p in props] |
| 105 | + return pd.DataFrame(props) |
104 | 106 |
|
105 | 107 | def mkdf(all_mols, best_mols, outpath):
|
106 | 108 | # Create dataframe from the lists
|
107 |
| - allscores = create_df(all_mols) |
108 |
| - minscores = create_df(best_mols) |
| 109 | + allscores = df_from_molProps(all_mols) |
| 110 | + minscores = df_from_molProps(best_mols) |
109 | 111 |
|
110 | 112 | # sort the dataframe based on docking scores
|
111 |
| - sortedscores = minscores.sort_values('SCORE.INTER') |
| 113 | + sortedscores = minscores.sort_values('Score') |
112 | 114 | # Drop dulicated entries
|
113 | 115 | sortedscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
|
114 | 116 |
|
|
0 commit comments