|
1 | | -"""Calculate the rotation axis for a pair of PDB structures.""" |
| 1 | +"""Calculate the rotation axis for a pair of PDB structures. |
2 | 2 |
|
| 3 | +This script calculates the rotation axis between two PDB structures using rigid |
| 4 | +point registration. Important note: |
| 5 | +
|
| 6 | +This script only works for PDB structures which match (same atoms, otherwise |
| 7 | + registration may be non-sensical). This requires |
| 8 | + the same number of atoms in both structures. |
| 9 | +
|
| 10 | +The script can filter for only CA atoms in case not all sidechains are modeled |
| 11 | + and/or additional ligands are in one model. Use the --filter-ca-only flag to |
| 12 | + enable this filtering. |
| 13 | +""" |
| 14 | + |
| 15 | +import argparse |
3 | 16 | import math |
4 | | -import sys |
| 17 | +import warnings |
5 | 18 |
|
6 | 19 | import mmdf |
7 | 20 | import roma |
@@ -98,18 +111,76 @@ def calculate_axis_euler_angles(axis: torch.Tensor) -> tuple[float, float]: |
98 | 111 |
|
99 | 112 | def main() -> None: |
100 | 113 | """Calculate rotation axis for a pair of PDB structures.""" |
101 | | - if len(sys.argv) != 4: |
102 | | - print(f"Usage: {sys.argv[0]} <pdb_file1> <pdb_file2> <output_file>") |
103 | | - sys.exit(1) |
| 114 | + parser = argparse.ArgumentParser( |
| 115 | + description="Calculate the rotation axis for a pair of PDB structures." |
| 116 | + ) |
| 117 | + parser.add_argument("pdb_file1", help="First PDB file") |
| 118 | + parser.add_argument("pdb_file2", help="Second PDB file") |
| 119 | + parser.add_argument("output_file", help="Output file for rotation analysis") |
| 120 | + parser.add_argument( |
| 121 | + "--filter-ca-only", |
| 122 | + action="store_true", |
| 123 | + help="Filter to only CA (alpha carbon) atoms if atom counts don't match", |
| 124 | + ) |
| 125 | + args = parser.parse_args() |
104 | 126 |
|
105 | | - pdb_file1 = sys.argv[1] |
106 | | - pdb_file2 = sys.argv[2] |
107 | | - output_file = sys.argv[3] |
| 127 | + pdb_file1 = args.pdb_file1 |
| 128 | + pdb_file2 = args.pdb_file2 |
| 129 | + output_file = args.output_file |
| 130 | + filter_ca_only = args.filter_ca_only |
108 | 131 |
|
109 | 132 | # Parse PDB files |
110 | 133 | df1 = mmdf.read(pdb_file1) |
111 | 134 | df2 = mmdf.read(pdb_file2) |
112 | 135 |
|
| 136 | + # Check if atom counts match |
| 137 | + if len(df1) != len(df2): |
| 138 | + error_msg = ( |
| 139 | + f"PDB files have different numbers of atoms:\n" |
| 140 | + f" {pdb_file1}: {len(df1)} atoms\n" |
| 141 | + f" {pdb_file2}: {len(df2)} atoms\n\n" |
| 142 | + "Rigid registration requires matching atom counts. " |
| 143 | + "If not all sidechains are modeled and/or additional ligands are " |
| 144 | + "present in one model, try rerunning with --filter-ca-only to " |
| 145 | + "filter to only CA (alpha carbon) atoms." |
| 146 | + ) |
| 147 | + |
| 148 | + if not filter_ca_only: |
| 149 | + raise ValueError(error_msg) |
| 150 | + |
| 151 | + # Filter to CA atoms if flag is set |
| 152 | + warnings.warn( |
| 153 | + f"PDB files have different numbers of atoms: " |
| 154 | + f"{pdb_file1} has {len(df1)} atoms, " |
| 155 | + f"{pdb_file2} has {len(df2)} atoms. " |
| 156 | + "Filtering to CA (alpha carbon) atoms for alignment.", |
| 157 | + UserWarning, |
| 158 | + stacklevel=2, |
| 159 | + ) |
| 160 | + |
| 161 | + if "atom_name" not in df1.columns or "atom_name" not in df2.columns: |
| 162 | + raise ValueError( |
| 163 | + "Cannot filter atoms - 'atom_name' column not found in PDB data." |
| 164 | + ) |
| 165 | + |
| 166 | + df1_ca = df1[df1["atom_name"] == "CA"].copy() |
| 167 | + df2_ca = df2[df2["atom_name"] == "CA"].copy() |
| 168 | + |
| 169 | + if len(df1_ca) == 0 or len(df2_ca) == 0: |
| 170 | + raise ValueError("No CA atoms found in one or both PDB files.") |
| 171 | + |
| 172 | + if len(df1_ca) != len(df2_ca): |
| 173 | + raise ValueError( |
| 174 | + f"Even after filtering to CA atoms, counts don't match:\n" |
| 175 | + f" {pdb_file1}: {len(df1_ca)} CA atoms\n" |
| 176 | + f" {pdb_file2}: {len(df2_ca)} CA atoms\n\n" |
| 177 | + "Cannot perform rigid registration with mismatched atom counts." |
| 178 | + ) |
| 179 | + |
| 180 | + print(f"Using {len(df1_ca)} CA atoms for alignment.") |
| 181 | + df1 = df1_ca |
| 182 | + df2 = df2_ca |
| 183 | + |
113 | 184 | # Extract coordinates |
114 | 185 | coords1 = torch.tensor(df1[["x", "y", "z"]].values, dtype=torch.float32) |
115 | 186 | coords2 = torch.tensor(df2[["x", "y", "z"]].values, dtype=torch.float32) |
|
0 commit comments