|
| 1 | +import argparse |
| 2 | +from rdkit import Chem |
| 3 | +from rdkit.Chem.rdchem import Atom, Mol |
| 4 | + |
| 5 | + |
| 6 | +def map_atoms(mol1: Mol, mol2: Mol) -> dict: |
| 7 | + """ |
| 8 | + Map atoms from the first molecule to corresponding atoms in the second molecule. |
| 9 | +
|
| 10 | + Args: |
| 11 | + mol1 (Mol): RDKit molecule from the first PDB file. |
| 12 | + mol2 (Mol): RDKit molecule from the second PDB file. |
| 13 | +
|
| 14 | + Returns: |
| 15 | + dict: Mapping of atoms from mol1 to mol2. |
| 16 | + """ |
| 17 | + match_atoms = mol1.GetSubstructMatch(mol2) |
| 18 | + if len(match_atoms) == 0: |
| 19 | + raise ValueError("No atoms matched between the two molecules.") |
| 20 | + mol2_index_to_mol1_index = {mol2_index: mol1_index for mol2_index, mol1_index in enumerate(match_atoms)} |
| 21 | + mol1_index_to_mol2_index = {k: v for v, k in mol2_index_to_mol1_index.items()} |
| 22 | + return mol1_index_to_mol2_index |
| 23 | + |
| 24 | + |
| 25 | +def compare_atom_properties(atom1: Atom, atom2: Atom) -> dict: |
| 26 | + """ |
| 27 | + Compare properties of two atoms and return a dictionary of differences. |
| 28 | +
|
| 29 | + Args: |
| 30 | + atom1 (Atom): RDKit atom from the first molecule. |
| 31 | + atom2 (Atom): RDKit atom from the second molecule. |
| 32 | +
|
| 33 | + Returns: |
| 34 | + dict: Dictionary of property differences between the two atoms. |
| 35 | + """ |
| 36 | + properties_diff = {} |
| 37 | + atom1_properties = get_properties(atom1) |
| 38 | + atom2_properties = get_properties(atom2) |
| 39 | + for key, atom1_value in atom1_properties.items(): |
| 40 | + if atom1_value != atom2_properties[key]: |
| 41 | + properties_diff[key] = (atom1_value, atom2_properties[key]) |
| 42 | + return properties_diff |
| 43 | + |
| 44 | + |
| 45 | +def compare_atoms(mol1: Mol, mol2: Mol, atom_map: dict, intended_changes: list = []) -> dict: |
| 46 | + """ |
| 47 | + Compare properties of mapped atoms in two molecules and return a dictionary of differences. |
| 48 | +
|
| 49 | + Args: |
| 50 | + mol1 (Mol): RDKit molecule from the first PDB file. |
| 51 | + mol2 (Mol): RDKit molecule from the second PDB file. |
| 52 | + atom_map (dict): Mapping of atoms from mol1 to mol2. |
| 53 | + intended_changes (list): List of intended changes (e.g., "added_hydrogen"). |
| 54 | +
|
| 55 | + Returns: |
| 56 | + dict: Dictionary of property differences between mapped atoms in the two molecules. |
| 57 | + """ |
| 58 | + properties_diff = {} |
| 59 | + deleted_atoms = set(atom.GetIdx() for atom in mol1.GetAtoms()) - set(atom_map.keys()) |
| 60 | + for atom1_idx, atom2_idx in atom_map.items(): |
| 61 | + atom1 = mol1.GetAtomWithIdx(atom1_idx) |
| 62 | + atom2 = mol2.GetAtomWithIdx(atom2_idx) |
| 63 | + |
| 64 | + # Check if the difference is related to intended changes |
| 65 | + if "added_hydrogens" in intended_changes: |
| 66 | + diff_implicit_valence = atom2.GetImplicitValence() - atom1.GetImplicitValence() |
| 67 | + diff_num_implicit_hs = atom2.GetNumImplicitHs() - atom1.GetNumImplicitHs() |
| 68 | + |
| 69 | + # If the difference in implicit valence and num implicit Hs is the same, ignore the difference |
| 70 | + if diff_implicit_valence == diff_num_implicit_hs: |
| 71 | + continue |
| 72 | + |
| 73 | + atom_diff = compare_atom_properties(atom1, atom2) |
| 74 | + # check if the difference is related to changed residue label, then remove from difference |
| 75 | + if "GetResidueName" in atom_diff and "changed_residue_label" in intended_changes: |
| 76 | + atom_diff.pop("GetResidueName") |
| 77 | + |
| 78 | + if atom_diff: |
| 79 | + properties_diff[atom1_idx] = atom_diff |
| 80 | + |
| 81 | + if "ignore_deleted" not in intended_changes: |
| 82 | + # Add deleted atoms to the differences |
| 83 | + for atom_idx in deleted_atoms: |
| 84 | + properties_diff[atom_idx] = {"DeletedAtom": True} |
| 85 | + |
| 86 | + return properties_diff |
| 87 | + |
| 88 | + |
| 89 | +def get_properties(atom: Atom) -> dict: |
| 90 | + """Get properties of an RDKit atom.""" |
| 91 | + res_dict = {} |
| 92 | + res = atom.GetPDBResidueInfo() |
| 93 | + res_dict['GetResidueName'] = res.GetResidueName() |
| 94 | + res_dict['GetResidueNumber'] = res.GetResidueNumber() |
| 95 | + res_dict['GetChainId'] = res.GetChainId() |
| 96 | + res_dict['GetName'] = res.GetName() |
| 97 | + prop_dict = { |
| 98 | + "GetSymbol": atom.GetSymbol(), |
| 99 | + "GetIdx": atom.GetIdx(), |
| 100 | + "GetAtomicNum": atom.GetAtomicNum(), |
| 101 | + "GetBonds": sorted([(sorted([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()])) for bond in atom.GetBonds()]), |
| 102 | + "GetChiralTag": atom.GetChiralTag(), |
| 103 | + "GetDegree": atom.GetDegree(), |
| 104 | + "GetExplicitValence": atom.GetExplicitValence(), |
| 105 | + "GetFormalCharge": atom.GetFormalCharge(), |
| 106 | + "GetHybridization": atom.GetHybridization(), |
| 107 | + "GetImplicitValence": atom.GetImplicitValence(), |
| 108 | + "GetIsAromatic": atom.GetIsAromatic(), |
| 109 | + "GetIsotope": atom.GetIsotope(), |
| 110 | + "GetMass": atom.GetMass(), |
| 111 | + "GetNumExplicitHs": atom.GetNumExplicitHs(), |
| 112 | + "GetNumImplicitHs": atom.GetNumImplicitHs(), |
| 113 | + "IsInRing": atom.IsInRing() |
| 114 | + } |
| 115 | + prop_dict.update(res_dict) |
| 116 | + return prop_dict |
| 117 | + |
| 118 | + |
| 119 | +def main() -> None: |
| 120 | + """Main function.""" |
| 121 | + parser = argparse.ArgumentParser(description='Check if the topology has changed between two PDB files.') |
| 122 | + parser.add_argument('--file1', type=str, help='Path to the first PDB file') |
| 123 | + parser.add_argument('--file2', type=str, help='Path to the second PDB file') |
| 124 | + parser.add_argument('--intended_changes', nargs='+', type=str, default=[], |
| 125 | + help='List of intended changes', choices=['added_hydrogens', 'changed_residue_label', 'ignore_deleted', 'atom_order_change']) |
| 126 | + args = parser.parse_args() |
| 127 | + mol1 = Chem.MolFromPDBFile(args.file1, removeHs=False, sanitize=False) |
| 128 | + mol2 = Chem.MolFromPDBFile(args.file2, removeHs=False, sanitize=False) |
| 129 | + |
| 130 | + mol1_index_to_mol2_index = map_atoms(mol1, mol2) |
| 131 | + atom_differences = compare_atoms(mol1, mol2, mol1_index_to_mol2_index, intended_changes=args.intended_changes) |
| 132 | + if atom_differences: |
| 133 | + for mol1_idx, diff in atom_differences.items(): |
| 134 | + if mol1_idx in mol1_index_to_mol2_index: # otherwise was deleted |
| 135 | + mol2_idx = mol1_index_to_mol2_index[mol1_idx] |
| 136 | + print(f"File 1 Atom Idx {mol1_idx}, File 2 Atom Idx {mol2_idx}") |
| 137 | + for key, value in diff.items(): |
| 138 | + file1_value, file2_value = value |
| 139 | + print(f"Property: {key} File 1: {file1_value}, File 2: {file2_value}") |
| 140 | + else: |
| 141 | + print(f"File 1 Atom Idx {mol1_idx} differences: {diff}") |
| 142 | + topology_change = True |
| 143 | + else: |
| 144 | + topology_change = False |
| 145 | + |
| 146 | + # NOTE: MDAnalysis changes original atom label and also atom order!! |
| 147 | + if 'atom_order_change' not in args.intended_changes: |
| 148 | + # Check if the atom order has changed |
| 149 | + if list(mol1_index_to_mol2_index.keys()) != list(mol1_index_to_mol2_index.values()): |
| 150 | + topology_change = True |
| 151 | + |
| 152 | + # Write the boolean value to the output file |
| 153 | + with open('valid.txt', 'w', encoding='utf-8') as file: |
| 154 | + file.write(str(topology_change)) |
| 155 | + |
| 156 | + |
| 157 | +if __name__ == '__main__': |
| 158 | + main() |
0 commit comments