forked from sijie-liu97/AI_teaching
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathphysicochem_properties_for_pca.py
More file actions
138 lines (100 loc) · 4.44 KB
/
physicochem_properties_for_pca.py
File metadata and controls
138 lines (100 loc) · 4.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import argparse
from rdkit import Chem
from rdkit.Chem import PandasTools
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Lipinski
from rdkit.Chem import Crippen
import pandas as pd
'''
Script to calculate physicochemical properties of molecules:
(changed from the script using for ring systems project, need to change some names and annotations)
number of nitrogen atoms, number of oxygen atoms, number of chiral centers, molecular weight, number of heavy atoms,
number of hydrogen bond acceptors, number of hydrogen bond donors, logP, topological polar surface area, number of
aromatic atoms, formal charge, number of rings, number of bridgehead atoms, fraction of Csp3 atoms
'''
def get_physicochemical_properties(molDF,smiles_column):
'''
Applies all property calculations to the ring systems of the dataframe and stores each property in a new column
:param molDF: dataframe with ring systems as SMILES in the column 'ringSmiles'
:return: a dataframe with ring system molecules and their properties
'''
PandasTools.AddMoleculeColumnToFrame(molDF, smiles_column, 'Molecule')
print('Start calculcating parameters.')
molDF['N'] = molDF['Molecule'].apply(get_molecule_composition, args=(7,))
molDF['O'] = molDF['Molecule'].apply(get_molecule_composition, args=(8,))
molDF['chiral'] = molDF['Molecule'].apply(get_nof_chiral_centers)
molDF['MW'] = molDF['Molecule'].apply(get_MW)
molDF['heavy_atoms'] = molDF['Molecule'].apply(num_heavy_atoms)
molDF['h_acc'] = molDF['Molecule'].apply(num_of_h_acceptors_and_donors, args=(True,))
molDF['h_don'] = molDF['Molecule'].apply(num_of_h_acceptors_and_donors, args=(False,))
molDF['logP'] = molDF['Molecule'].apply(get_logp)
molDF['TPSA'] = molDF['Molecule'].apply(get_TPSA)
molDF['numAro'] = molDF['Molecule'].apply(num_aromatic_atoms)
molDF['formalCharge'] = molDF['Molecule'].apply(sum_formal_charge)
molDF['numRings'] = molDF['Molecule'].apply(num_rings)
molDF['bridgeheadAtoms'] = molDF['Molecule'].apply(num_bridgehead_atoms)
molDF['frac_csp3'] = molDF['Molecule'].apply(fraction_csp3)
def get_further_physicochemical_properties(molDF):
'''
:param molDF: dataframe with ring systems as SMILES in the column 'ringSmiles'
:return: a dataframe with ring system molecules and their properties
'''
print('Start calculcating further properties.')
del molDF['bridgeheadAtoms']
molDF['S'] = molDF['Molecule'].apply(get_molecule_composition, args=(16,))
molDF['nHalogens'] = molDF['Molecule'].apply(num_halogens)
molDF['MR'] = molDF['Molecule'].apply(get_mr)
def get_molecule_composition(mol, requestedAtomicNum):
'''
Counts the number of atoms of a given element in the ring system
:param mol: the ring system molecule
:param requestedAtomicNum: atomic number of the element for which the occurrence should be counted
:return: the number of atoms of an element
'''
counter = 0
for atom in mol.GetAtoms():
atomicNum = atom.GetAtomicNum()
if atomicNum == requestedAtomicNum:
counter += 1
return counter
def get_nof_chiral_centers(mol):
return len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
def get_MW(mol):
return round(Descriptors.MolWt(mol), 3)
def num_heavy_atoms(mol):
return Lipinski.HeavyAtomCount(mol)
def num_of_h_acceptors_and_donors(mol, acc=True):
if acc:
return Lipinski.NumHAcceptors(mol)
else:
return Lipinski.NumHDonors(mol)
def get_logp(mol):
return round(Crippen.MolLogP(mol), 3)
def get_TPSA(mol):
return round(Descriptors.TPSA(mol), 3)
def num_aromatic_atoms(mol):
numAromaticAtoms = 0
for atom in mol.GetAtoms():
if atom.GetIsAromatic():
numAromaticAtoms += 1
return numAromaticAtoms
def sum_formal_charge(mol):
formalCharge = 0
for atom in mol.GetAtoms():
formalCharge += atom.GetFormalCharge()
return formalCharge
def num_rings(mol):
return Chem.GetSSSR(mol)
def num_bridgehead_atoms(mol):
return rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
def fraction_csp3(mol):
return round(Descriptors.FractionCSP3(mol), 3)
def num_halogens(mol):
return Chem.Fragments.fr_halogen(mol)
def get_mr(mol):
'''Wildman-Crippen MR value
Uses an atom-based scheme based on the values in the paper:
Wildman and G. M. Crippen JCICS 39 868-873 (1999)
'''
return round(Crippen.MolMR(mol),3)