Skip to content

Commit e052bbe

Browse files
authored
Merge pull request #113 from Lucaslab-Berkeley/jd_update_get_rot_axis
get_rot_axis.py explicit warnings
2 parents 3883e5e + 07039fd commit e052bbe

File tree

2 files changed

+80
-9
lines changed

2 files changed

+80
-9
lines changed

docs/programs/constrained_search.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ particle_stack_constrained: # This is from the constrained particles
5555
5656
We must specify what angular space to perform the orientation search over.
5757
The first thing we must specify is the primary rotation axis, which we set as the Z axis.
58-
If unknown, this can be calculated using two PDB models (one rotated and one unrotated) using the script [get_rot_axis.py](https://raw.githubusercontent.com/Lucaslab-Berkeley/Leopard-EM/refs/heads/main/programs/constrained_search/utils/get_center_vector.py).
58+
If unknown, this can be calculated using two PDB models (one rotated and one unrotated) using the script [get_rot_axis.py](https://raw.githubusercontent.com/Lucaslab-Berkeley/Leopard-EM/refs/heads/main/programs/constrained_search/utils/get_rot_axis.py).
5959
6060
As well as searching over one rotation axis, we can search over a second axis, which by default is the y axis.
6161
This can be changed to any axis orthogonal to the primary axis by specifying a `roll_axis` and using the `base_grid_method: roll`.

programs/constrained_search/utils/get_rot_axis.py

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
1-
"""Calculate the rotation axis for a pair of PDB structures."""
1+
"""Calculate the rotation axis for a pair of PDB structures.
22
3+
This script calculates the rotation axis between two PDB structures using rigid
4+
point registration. Important note:
5+
6+
This script only works for PDB structures which match (same atoms, otherwise
7+
registration may be non-sensical). This requires
8+
the same number of atoms in both structures.
9+
10+
The script can filter for only CA atoms in case not all sidechains are modeled
11+
and/or additional ligands are in one model. Use the --filter-ca-only flag to
12+
enable this filtering.
13+
"""
14+
15+
import argparse
316
import math
4-
import sys
17+
import warnings
518

619
import mmdf
720
import roma
@@ -98,18 +111,76 @@ def calculate_axis_euler_angles(axis: torch.Tensor) -> tuple[float, float]:
98111

99112
def main() -> None:
100113
"""Calculate rotation axis for a pair of PDB structures."""
101-
if len(sys.argv) != 4:
102-
print(f"Usage: {sys.argv[0]} <pdb_file1> <pdb_file2> <output_file>")
103-
sys.exit(1)
114+
parser = argparse.ArgumentParser(
115+
description="Calculate the rotation axis for a pair of PDB structures."
116+
)
117+
parser.add_argument("pdb_file1", help="First PDB file")
118+
parser.add_argument("pdb_file2", help="Second PDB file")
119+
parser.add_argument("output_file", help="Output file for rotation analysis")
120+
parser.add_argument(
121+
"--filter-ca-only",
122+
action="store_true",
123+
help="Filter to only CA (alpha carbon) atoms if atom counts don't match",
124+
)
125+
args = parser.parse_args()
104126

105-
pdb_file1 = sys.argv[1]
106-
pdb_file2 = sys.argv[2]
107-
output_file = sys.argv[3]
127+
pdb_file1 = args.pdb_file1
128+
pdb_file2 = args.pdb_file2
129+
output_file = args.output_file
130+
filter_ca_only = args.filter_ca_only
108131

109132
# Parse PDB files
110133
df1 = mmdf.read(pdb_file1)
111134
df2 = mmdf.read(pdb_file2)
112135

136+
# Check if atom counts match
137+
if len(df1) != len(df2):
138+
error_msg = (
139+
f"PDB files have different numbers of atoms:\n"
140+
f" {pdb_file1}: {len(df1)} atoms\n"
141+
f" {pdb_file2}: {len(df2)} atoms\n\n"
142+
"Rigid registration requires matching atom counts. "
143+
"If not all sidechains are modeled and/or additional ligands are "
144+
"present in one model, try rerunning with --filter-ca-only to "
145+
"filter to only CA (alpha carbon) atoms."
146+
)
147+
148+
if not filter_ca_only:
149+
raise ValueError(error_msg)
150+
151+
# Filter to CA atoms if flag is set
152+
warnings.warn(
153+
f"PDB files have different numbers of atoms: "
154+
f"{pdb_file1} has {len(df1)} atoms, "
155+
f"{pdb_file2} has {len(df2)} atoms. "
156+
"Filtering to CA (alpha carbon) atoms for alignment.",
157+
UserWarning,
158+
stacklevel=2,
159+
)
160+
161+
if "atom_name" not in df1.columns or "atom_name" not in df2.columns:
162+
raise ValueError(
163+
"Cannot filter atoms - 'atom_name' column not found in PDB data."
164+
)
165+
166+
df1_ca = df1[df1["atom_name"] == "CA"].copy()
167+
df2_ca = df2[df2["atom_name"] == "CA"].copy()
168+
169+
if len(df1_ca) == 0 or len(df2_ca) == 0:
170+
raise ValueError("No CA atoms found in one or both PDB files.")
171+
172+
if len(df1_ca) != len(df2_ca):
173+
raise ValueError(
174+
f"Even after filtering to CA atoms, counts don't match:\n"
175+
f" {pdb_file1}: {len(df1_ca)} CA atoms\n"
176+
f" {pdb_file2}: {len(df2_ca)} CA atoms\n\n"
177+
"Cannot perform rigid registration with mismatched atom counts."
178+
)
179+
180+
print(f"Using {len(df1_ca)} CA atoms for alignment.")
181+
df1 = df1_ca
182+
df2 = df2_ca
183+
113184
# Extract coordinates
114185
coords1 = torch.tensor(df1[["x", "y", "z"]].values, dtype=torch.float32)
115186
coords2 = torch.tensor(df2[["x", "y", "z"]].values, dtype=torch.float32)

0 commit comments

Comments
 (0)