@@ -23,16 +23,16 @@ def process_by_folder(fd, inpath):
23
23
sd = inpath + '/' + fd + '/ranked_designs.sd'
24
24
if os .path .exists (sd ):
25
25
cir_mols = [PropertyMol (m ) for m in Chem .SDMolSupplier (sd )]
26
- for i , m in enumerate ( cir_mols ) :
26
+ for m in cir_mols :
27
27
# Calculate properties for each mol
28
28
m .SetProp ('Cycle' ,cycle )
29
29
m .SetProp ('MolWeight' , str (MolWt (m )))
30
30
m .SetProp ('LogP' , str (LogP (m )))
31
31
m .SetProp ('QED' , str (QED (m )))
32
32
m .SetProp ('SAS' , str (SAS (m )))
33
- if i == 0 :
34
- # Select the highest score design in the cycle
35
- best_mol = m
33
+ # Select the highest score design in the cycle
34
+ # ( the first one in the ranked sd file)
35
+ best_mol = cir_mols [ 0 ]
36
36
return cir_mols , best_mol
37
37
38
38
# calculated mol properties from each cycle and combine mols in one sdf file
@@ -79,20 +79,25 @@ def combine_designs(inpath, outpath):
79
79
80
80
return all_mols , best_mols
81
81
82
- # Create dataframe with all the properties
83
82
def create_df (mol_list ):
83
+ # Create a dataframe with all these mol properties
84
+ # These props should exist if the designs are post-processed by funtions above
85
+ mol_props = ['Name' ,'Cycle' ,'SCORE.INTER' ,'SMILES' ,'LogP' ,'QED' ,'MolWeight' ,'SAS' ]
84
86
df = pd .DataFrame ()
85
87
86
- df ['Design' ] = [m .GetProp ('Name' ) for m in mol_list ]
87
- df ['Cycle' ] = [int (m .GetProp ('Cycle' )) for m in mol_list ]
88
- df ['Score' ] = [float (m .GetProp ('SCORE.INTER' )) for m in mol_list ]
89
- df ['SMILES' ] = [m .GetProp ('SMILES' ) for m in mol_list ]
90
- df ['Mol' ] = [m for m in mol_list ]
91
- df ['LogP' ] = [float (m .GetProp ('LogP' )) for m in mol_list ]
92
- df ['QED' ] = [float (m .GetProp ('QED' )) for m in mol_list ]
93
- df ['MolWt' ] = [float (m .GetProp ('MolWeight' )) for m in mol_list ]
94
- df ['SAS' ] = [float (m .GetProp ('SAS' )) for m in mol_list ]
95
-
88
+ # Fill df with lists
89
+ # (append by entry using dicts from each mol increases data overhead and is slow)
90
+ for prop in mol_props :
91
+ df [prop ] = [m .GetProp (prop ) for m in mol_list ]
92
+ # Convert strings to possible numeric dtypes
93
+ try :
94
+ inferred_type = pd .to_numeric (df [prop ]).dtype
95
+ df [prop ] = df [prop ].astype (inferred_type )
96
+ except ValueError :
97
+ pass
98
+
99
+ # Add mol objects to the last column
100
+ df ['Mol' ] = mol_list
96
101
return df
97
102
98
103
def mkdf (all_mols , best_mols , outpath ):
@@ -101,7 +106,7 @@ def mkdf(all_mols, best_mols, outpath):
101
106
minscores = create_df (best_mols )
102
107
103
108
# sort the dataframe based on docking scores
104
- sortedscores = minscores .sort_values ('Score ' )
109
+ sortedscores = minscores .sort_values ('SCORE.INTER ' )
105
110
# Drop dulicated entries
106
111
sortedscores .drop_duplicates ('SMILES' , inplace = True , keep = 'first' )
107
112
0 commit comments