Skip to content

Commit 9b62765

Browse files
committed
Update post process script; add jupyter notebook
1 parent 2bc0e88 commit 9b62765

File tree

3 files changed

+457
-17
lines changed

3 files changed

+457
-17
lines changed

process_scores.ipynb

Lines changed: 435 additions & 0 deletions
Large diffs are not rendered by default.

sampledock/SnD/post_process.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@ def process_by_folder(fd, inpath):
2323
sd = inpath+'/'+fd+'/ranked_designs.sd'
2424
if os.path.exists(sd):
2525
cir_mols = [PropertyMol(m) for m in Chem.SDMolSupplier(sd)]
26-
for i,m in enumerate(cir_mols):
26+
for m in cir_mols:
2727
# Calculate properties for each mol
2828
m.SetProp('Cycle',cycle)
2929
m.SetProp('MolWeight', str(MolWt(m)))
3030
m.SetProp('LogP', str(LogP(m)))
3131
m.SetProp('QED', str(QED(m)))
3232
m.SetProp('SAS', str(SAS(m)))
33-
if i == 0:
34-
# Select the highest score design in the cycle
35-
best_mol = m
33+
# Select the highest score design in the cycle
34+
# (the first one in the ranked sd file)
35+
best_mol = cir_mols[0]
3636
return cir_mols, best_mol
3737

3838
# calculated mol properties from each cycle and combine mols in one sdf file
@@ -79,20 +79,25 @@ def combine_designs(inpath, outpath):
7979

8080
return all_mols, best_mols
8181

82-
# Create dataframe with all the properties
8382
def create_df(mol_list):
83+
# Create a dataframe with all these mol properties
84+
# These props should exist if the designs are post-processed by funtions above
85+
mol_props = ['Name','Cycle','SCORE.INTER','SMILES','LogP','QED','MolWeight','SAS']
8486
df = pd.DataFrame()
8587

86-
df['Design'] = [m.GetProp('Name') for m in mol_list]
87-
df['Cycle'] = [int(m.GetProp('Cycle')) for m in mol_list]
88-
df['Score'] = [float(m.GetProp('SCORE.INTER')) for m in mol_list]
89-
df['SMILES'] = [m.GetProp('SMILES') for m in mol_list]
90-
df['Mol'] = [m for m in mol_list]
91-
df['LogP'] = [float(m.GetProp('LogP')) for m in mol_list]
92-
df['QED'] = [float(m.GetProp('QED')) for m in mol_list]
93-
df['MolWt'] = [float(m.GetProp('MolWeight')) for m in mol_list]
94-
df['SAS'] = [float(m.GetProp('SAS')) for m in mol_list]
95-
88+
# Fill df with lists
89+
# (append by entry using dicts from each mol increases data overhead and is slow)
90+
for prop in mol_props:
91+
df[prop] = [m.GetProp(prop) for m in mol_list]
92+
# Convert strings to possible numeric dtypes
93+
try:
94+
inferred_type = pd.to_numeric(df[prop]).dtype
95+
df[prop] = df[prop].astype(inferred_type)
96+
except ValueError:
97+
pass
98+
99+
# Add mol objects to the last column
100+
df['Mol'] = mol_list
96101
return df
97102

98103
def mkdf(all_mols, best_mols, outpath):
@@ -101,7 +106,7 @@ def mkdf(all_mols, best_mols, outpath):
101106
minscores = create_df(best_mols)
102107

103108
# sort the dataframe based on docking scores
104-
sortedscores = minscores.sort_values('Score')
109+
sortedscores = minscores.sort_values('SCORE.INTER')
105110
# Drop dulicated entries
106111
sortedscores.drop_duplicates('SMILES', inplace = True, keep = 'first')
107112

sampledock/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
print('Using CUDA device:',torch.cuda.get_device_name(torch.cuda.current_device()))
4040
else:
4141
device = torch.device("cpu")
42-
print("Using CPU for torch device")
42+
print("CUDA device not available. Using CPU for torch device.")
4343

4444
jtvae.load_state_dict(torch.load(p.model_loc, map_location=device))
4545

0 commit comments

Comments
 (0)