Skip to content

Commit 6539626

Browse files
Brandon Duane WalkerBrandon Duane Walker
authored andcommitted
check pdb topology differences
1 parent a02e17d commit 6539626

File tree

3 files changed

+243
-0
lines changed

3 files changed

+243
-0
lines changed

cwl_adapters/topology_check.cwl

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env cwl-runner
2+
cwlVersion: v1.0
3+
4+
class: CommandLineTool
5+
6+
label: Check for topology changes in PDB files
7+
8+
doc: |-
9+
Check for topology changes in PDB files
10+
11+
baseCommand: ["python", "/topology_check.py"]
12+
13+
hints:
14+
DockerRequirement:
15+
dockerPull: mrbrandonwalker/topology_check
16+
17+
requirements:
18+
InlineJavascriptRequirement: {}
19+
20+
inputs:
21+
22+
file1:
23+
type: File
24+
inputBinding:
25+
prefix: --file1
26+
format:
27+
- edam:format_1476
28+
29+
file2:
30+
type: File
31+
inputBinding:
32+
prefix: --file2
33+
format:
34+
- edam:format_1476
35+
36+
intended_changes:
37+
type: string?
38+
inputBinding:
39+
prefix: --intended_changes
40+
41+
topology_changed:
42+
type: string?
43+
44+
outputs:
45+
46+
topology_changed:
47+
type: boolean
48+
outputBinding:
49+
glob: valid.txt
50+
loadContents: true
51+
outputEval: |
52+
${
53+
// Read the contents of the file
54+
const lines = self[0].contents.split("\n");
55+
// Read boolean value from the first line
56+
const valid = lines[0].trim() === "True";
57+
return valid;
58+
}
59+
60+
stdout:
61+
type: File
62+
outputBinding:
63+
glob: stdout
64+
65+
stdout: stdout
66+
67+
$namespaces:
68+
edam: https://edamontology.org/
69+
70+
$schemas:
71+
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# docker build -f Dockerfile_topology_check -t mrbrandonwalker/topology_check .
2+
3+
FROM condaforge/mambaforge
4+
# NOT mambaforge-pypy3 (rdkit is incompatible with pypy)
5+
6+
RUN mamba install -c conda-forge rdkit --yes
7+
8+
RUN mamba init bash
9+
10+
COPY topology_check.py /.
11+
12+
RUN mamba clean --all --yes
13+
14+
ADD Dockerfile_topology_check .

examples/scripts/topology_check.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import argparse
2+
from rdkit import Chem
3+
from rdkit.Chem.rdchem import Atom, Mol
4+
5+
6+
def map_atoms(mol1: Mol, mol2: Mol) -> dict:
7+
"""
8+
Map atoms from the first molecule to corresponding atoms in the second molecule.
9+
10+
Args:
11+
mol1 (Mol): RDKit molecule from the first PDB file.
12+
mol2 (Mol): RDKit molecule from the second PDB file.
13+
14+
Returns:
15+
dict: Mapping of atoms from mol1 to mol2.
16+
"""
17+
match_atoms = mol1.GetSubstructMatch(mol2)
18+
if len(match_atoms) == 0:
19+
raise ValueError("No atoms matched between the two molecules.")
20+
mol2_index_to_mol1_index = {mol2_index: mol1_index for mol2_index, mol1_index in enumerate(match_atoms)}
21+
mol1_index_to_mol2_index = {k: v for v, k in mol2_index_to_mol1_index.items()}
22+
return mol1_index_to_mol2_index
23+
24+
25+
def compare_atom_properties(atom1: Atom, atom2: Atom) -> dict:
26+
"""
27+
Compare properties of two atoms and return a dictionary of differences.
28+
29+
Args:
30+
atom1 (Atom): RDKit atom from the first molecule.
31+
atom2 (Atom): RDKit atom from the second molecule.
32+
33+
Returns:
34+
dict: Dictionary of property differences between the two atoms.
35+
"""
36+
properties_diff = {}
37+
atom1_properties = get_properties(atom1)
38+
atom2_properties = get_properties(atom2)
39+
for key, atom1_value in atom1_properties.items():
40+
if atom1_value != atom2_properties[key]:
41+
properties_diff[key] = (atom1_value, atom2_properties[key])
42+
return properties_diff
43+
44+
45+
def compare_atoms(mol1: Mol, mol2: Mol, atom_map: dict, intended_changes: list = []) -> dict:
46+
"""
47+
Compare properties of mapped atoms in two molecules and return a dictionary of differences.
48+
49+
Args:
50+
mol1 (Mol): RDKit molecule from the first PDB file.
51+
mol2 (Mol): RDKit molecule from the second PDB file.
52+
atom_map (dict): Mapping of atoms from mol1 to mol2.
53+
intended_changes (list): List of intended changes (e.g., "added_hydrogen").
54+
55+
Returns:
56+
dict: Dictionary of property differences between mapped atoms in the two molecules.
57+
"""
58+
properties_diff = {}
59+
deleted_atoms = set(atom.GetIdx() for atom in mol1.GetAtoms()) - set(atom_map.keys())
60+
for atom1_idx, atom2_idx in atom_map.items():
61+
atom1 = mol1.GetAtomWithIdx(atom1_idx)
62+
atom2 = mol2.GetAtomWithIdx(atom2_idx)
63+
64+
# Check if the difference is related to intended changes
65+
if "added_hydrogens" in intended_changes:
66+
diff_implicit_valence = atom2.GetImplicitValence() - atom1.GetImplicitValence()
67+
diff_num_implicit_hs = atom2.GetNumImplicitHs() - atom1.GetNumImplicitHs()
68+
69+
# If the difference in implicit valence and num implicit Hs is the same, ignore the difference
70+
if diff_implicit_valence == diff_num_implicit_hs:
71+
continue
72+
73+
atom_diff = compare_atom_properties(atom1, atom2)
74+
# check if the difference is related to changed residue label, then remove from difference
75+
if "GetResidueName" in atom_diff and "changed_residue_label" in intended_changes:
76+
atom_diff.pop("GetResidueName")
77+
78+
if atom_diff:
79+
properties_diff[atom1_idx] = atom_diff
80+
81+
if "ignore_deleted" not in intended_changes:
82+
# Add deleted atoms to the differences
83+
for atom_idx in deleted_atoms:
84+
properties_diff[atom_idx] = {"DeletedAtom": True}
85+
86+
return properties_diff
87+
88+
89+
def get_properties(atom: Atom) -> dict:
90+
"""Get properties of an RDKit atom."""
91+
res_dict = {}
92+
res = atom.GetPDBResidueInfo()
93+
res_dict['GetResidueName'] = res.GetResidueName()
94+
res_dict['GetResidueNumber'] = res.GetResidueNumber()
95+
res_dict['GetChainId'] = res.GetChainId()
96+
res_dict['GetName'] = res.GetName()
97+
prop_dict = {
98+
"GetSymbol": atom.GetSymbol(),
99+
"GetIdx": atom.GetIdx(),
100+
"GetAtomicNum": atom.GetAtomicNum(),
101+
"GetBonds": sorted([(sorted([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()])) for bond in atom.GetBonds()]),
102+
"GetChiralTag": atom.GetChiralTag(),
103+
"GetDegree": atom.GetDegree(),
104+
"GetExplicitValence": atom.GetExplicitValence(),
105+
"GetFormalCharge": atom.GetFormalCharge(),
106+
"GetHybridization": atom.GetHybridization(),
107+
"GetImplicitValence": atom.GetImplicitValence(),
108+
"GetIsAromatic": atom.GetIsAromatic(),
109+
"GetIsotope": atom.GetIsotope(),
110+
"GetMass": atom.GetMass(),
111+
"GetNumExplicitHs": atom.GetNumExplicitHs(),
112+
"GetNumImplicitHs": atom.GetNumImplicitHs(),
113+
"IsInRing": atom.IsInRing()
114+
}
115+
prop_dict.update(res_dict)
116+
return prop_dict
117+
118+
119+
def main() -> None:
120+
"""Main function."""
121+
parser = argparse.ArgumentParser(description='Check if the topology has changed between two PDB files.')
122+
parser.add_argument('--file1', type=str, help='Path to the first PDB file')
123+
parser.add_argument('--file2', type=str, help='Path to the second PDB file')
124+
parser.add_argument('--intended_changes', nargs='+', type=str, default=[],
125+
help='List of intended changes', choices=['added_hydrogens', 'changed_residue_label', 'ignore_deleted', 'atom_order_change'])
126+
args = parser.parse_args()
127+
mol1 = Chem.MolFromPDBFile(args.file1, removeHs=False, sanitize=False)
128+
mol2 = Chem.MolFromPDBFile(args.file2, removeHs=False, sanitize=False)
129+
130+
mol1_index_to_mol2_index = map_atoms(mol1, mol2)
131+
atom_differences = compare_atoms(mol1, mol2, mol1_index_to_mol2_index, intended_changes=args.intended_changes)
132+
if atom_differences:
133+
for mol1_idx, diff in atom_differences.items():
134+
if mol1_idx in mol1_index_to_mol2_index: # otherwise was deleted
135+
mol2_idx = mol1_index_to_mol2_index[mol1_idx]
136+
print(f"File 1 Atom Idx {mol1_idx}, File 2 Atom Idx {mol2_idx}")
137+
for key, value in diff.items():
138+
file1_value, file2_value = value
139+
print(f"Property: {key} File 1: {file1_value}, File 2: {file2_value}")
140+
else:
141+
print(f"File 1 Atom Idx {mol1_idx} differences: {diff}")
142+
topology_change = True
143+
else:
144+
topology_change = False
145+
146+
# NOTE: MDAnalysis changes original atom label and also atom order!!
147+
if 'atom_order_change' not in args.intended_changes:
148+
# Check if the atom order has changed
149+
if list(mol1_index_to_mol2_index.keys()) != list(mol1_index_to_mol2_index.values()):
150+
topology_change = True
151+
152+
# Write the boolean value to the output file
153+
with open('valid.txt', 'w', encoding='utf-8') as file:
154+
file.write(str(topology_change))
155+
156+
157+
if __name__ == '__main__':
158+
main()

0 commit comments

Comments
 (0)