-
Notifications
You must be signed in to change notification settings - Fork 54
Open
Description
The current SDF files have about ~40 molecules in SDF format that are non-neutral. Here's a script that regenerates correct ones.
import csv
import os
from rdkit import Chem
from rdkit.Chem import AllChem
def is_neutral(mol):
net_charge = 0
for a in mol.GetAtoms():
net_charge += a.GetFormalCharge()
return net_charge == 0
mols = []
mmff_fail_count = 0
with open('database.txt', newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
for line, row in enumerate(spamreader):
if line > 2:
name = row[0]
smiles = row[1]
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
print(smiles)
res = AllChem.EmbedMolecule(mol)
assert res == 0
res = AllChem.MMFFOptimizeMolecule(mol)
if res != 0:
mmff_fail_count += 1
exp_dG = float(row[3])
exp_dG_err = float(row[4])
mol.SetProp('_Name', name)
mol.SetProp('dG', str(exp_dG))
mol.SetProp('dG_err', str(exp_dG_err))
assert is_neutral(mol)
mols.append(mol)
print("mm_fail", mmff_fail_count)
w = Chem.SDWriter('freesolv.sdf')
for m in mols: w.write(m)
w.flush()
print("wrote", len(mols), "mols")Metadata
Metadata
Assignees
Labels
No labels