Make template unit vector computations match those of OpenFold (#235)

amorehead · web-flow · commit 8fbcfd100b2f · 2024-09-08T20:42:59.000-07:00
* Update __init__.py

* Update test_af3.py

* Update model_utils.py

* Update alphafold3.py

* Update template_parsing.py

* Update template_parsing.py

* Update alphafold3.py

* Update __init__.py

* Update template_parsing.py

* Update model_utils.py
diff --git a/alphafold3_pytorch/__init__.py b/alphafold3_pytorch/__init__.py
@@ -71,12 +71,15 @@
 from alphafold3_pytorch.utils.model_utils import (
     ExpressCoordinatesInFrame,
     RigidFrom3Points,
+    RigidFromReference3Points,
 )
 
 __all__ = [
     Attention,
     Attend,
     RelativePositionEncoding,
+    RigidFrom3Points,
+    RigidFromReference3Points,
     SmoothLDDTLoss,
     WeightedRigidAlign,
     MultiChainPermutationAlignment,
diff --git a/alphafold3_pytorch/alphafold3.py b/alphafold3_pytorch/alphafold3.py
@@ -78,6 +78,7 @@
 from alphafold3_pytorch.utils.model_utils import (
     ExpressCoordinatesInFrame,
     RigidFrom3Points,
+    RigidFromReference3Points,
     calculate_weighted_rigid_align_weights,
     package_available,
 )
diff --git a/alphafold3_pytorch/data/template_parsing.py b/alphafold3_pytorch/data/template_parsing.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 from loguru import logger
 from beartype.typing import Any, Dict, List, Literal, Mapping, Tuple
+from einops import einsum
 
 import numpy as np
 import polars as pl
@@ -22,8 +23,7 @@
 )
 from alphafold3_pytorch.utils.data_utils import extract_mmcif_metadata_field
 from alphafold3_pytorch.utils.model_utils import (
-    ExpressCoordinatesInFrame,
-    RigidFrom3Points,
+    RigidFromReference3Points,
     distance_to_dgram,
     get_frames_from_atom_pos,
 )
@@ -150,6 +150,7 @@ def _extract_template_features(
     num_distogram_bins: int = 39,
     distance_bins: List[float] = torch.linspace(3.25, 50.75, 39).float(),
     verbose: bool = False,
+    eps: float = 1e-20,
 ) -> Dict[str, Any]:
     """Parse atom positions in the target structure and align with the query.
 
@@ -173,6 +174,7 @@ def _extract_template_features(
     :param distance_bins: List of floats representing the bins for the distance
         histogram (i.e., distogram).
     :param verbose: Whether to log verbose output.
+    :param eps: A small value to prevent division by zero.
 
     :return: A dictionary containing the extra features derived from the template
         structure.
@@ -380,17 +382,23 @@ def _extract_template_features(
         template_three_atom_indices_for_frame.unsqueeze(-1).expand(-1, -1, 3),
     )
 
-    rigid_from_three_points = RigidFrom3Points()
-    template_backbone_frames, _ = rigid_from_three_points(
+    rigid_from_reference_3_points = RigidFromReferenceThreePoints()
+    template_backbone_frames, template_backbone_points = rigid_from_reference_3_points(
         template_backbone_frame_atom_positions.unbind(-2)
     )
 
-    express_coordinates_in_frame = ExpressCoordinatesInFrame()
-    template_unit_vector = express_coordinates_in_frame(
-        template_token_center_atom_positions.unsqueeze(0),
-        template_backbone_frames.unsqueeze(0),
-        pairwise=True,
-    ).squeeze(0)
+    inv_template_backbone_frames = template_backbone_frames.transpose(-1, -2)
+    template_backbone_vec = einsum(
+        inv_template_backbone_frames,
+        template_backbone_points.unsqueeze(-2) - template_backbone_points.unsqueeze(-3),
+        "n i j, m n j -> m n i",
+    )
+    template_inv_distance_scalar = torch.rsqrt(eps + torch.sum(template_backbone_vec**2, dim=-1))
+    template_inv_distance_scalar = (
+        template_inv_distance_scalar * template_backbone_frame_mask.unsqueeze(-1)
+    )
+
+    template_unit_vector = template_backbone_vec * template_inv_distance_scalar.unsqueeze(-1)
 
     return {
         "template_restype": template_restype.float(),
diff --git a/alphafold3_pytorch/utils/model_utils.py b/alphafold3_pytorch/utils/model_utils.py
@@ -809,13 +809,12 @@ def forward(
         self,
         coords: Float["b m 3"],  # type: ignore
         frame: Float["b m 3 3"] | Float["b 3 3"] | Float["3 3"],  # type: ignore
-        pairwise: bool = False,
-    ) -> Float["b m 3"] | Float["b m m 3"]:  # type: ignore
+    ) -> Float["b m 3"]:  # type: ignore
         """Express coordinates in the given frame.
 
         :param coords: Coordinates to be expressed in the given frame.
         :param frame: Frames defined by three points.
-        :return: The transformed coordinates or pairwise coordinates.
+        :return: The transformed coordinates.
         """
 
         if frame.ndim == 2:
@@ -833,38 +832,19 @@ def forward(
         e2 = l2norm(w2 - w1, eps=self.eps)
         e3 = torch.cross(e1, e2, dim=-1)
 
-        if pairwise:
-            # Compute pairwise displacement vectors
-            pairwise_d = coords.unsqueeze(2) - coords.unsqueeze(1)
+        # Project onto frame basis
+        d = coords - b
 
-            # Project onto frame basis
-            pairwise_transformed_coords = torch.stack(
-                (
-                    einsum(pairwise_d, e1.unsqueeze(1), "... i, ... i -> ..."),
-                    einsum(pairwise_d, e2.unsqueeze(1), "... i, ... i -> ..."),
-                    einsum(pairwise_d, e3.unsqueeze(1), "... i, ... i -> ..."),
-                ),
-                dim=-1,
-            )
-
-            # Normalize to get unit vectors
-            pairwise_transformed_coords = l2norm(pairwise_transformed_coords, eps=self.eps)
-            return pairwise_transformed_coords
-
-        else:
-            # Project onto frame basis
-            d = coords - b
-
-            transformed_coords = torch.stack(
-                (
-                    einsum(d, e1, "... i, ... i -> ..."),
-                    einsum(d, e2, "... i, ... i -> ..."),
-                    einsum(d, e3, "... i, ... i -> ..."),
-                ),
-                dim=-1,
-            )
+        transformed_coords = torch.stack(
+            (
+                einsum(d, e1, "... i, ... i -> ..."),
+                einsum(d, e2, "... i, ... i -> ..."),
+                einsum(d, e3, "... i, ... i -> ..."),
+            ),
+            dim=-1,
+        )
 
-            return transformed_coords
+        return transformed_coords
 
 
 class RigidFrom3Points(Module):
@@ -906,3 +886,97 @@ def forward(
         t = unpack_one(t, "* c")
 
         return R, t
+
+
+class RigidFromReference3Points(Module):
+    """A modification of Algorithm 21 in Section 1.8.1 in AlphaFold 2 paper:
+
+    https://www.nature.com/articles/s41586-021-03819-2
+
+    Inpsired by the implementation in the OpenFold codebase:
+    https://github.com/aqlaboratory/openfold/blob/6f63267114435f94ac0604b6d89e82ef45d94484/openfold/utils/feats.py#L143
+    """
+
+    @typecheck
+    def forward(
+        self,
+        three_points: Tuple[Float["... 3"], Float["... 3"], Float["... 3"]] | Float["3 ... 3"],  # type: ignore
+        eps: float = 1e-20,
+    ) -> Tuple[Float["... 3 3"], Float["... 3"]]:  # type: ignore
+        """Return a transformation object from reference coordinates.
+
+        NOTE: This method does not take care of symmetries. If you
+        provide the atom positions in the non-standard way,
+        e.g., the N atom of amino acid residues will end up
+        not at [-0.527250, 1.359329, 0.0] but instead at
+        [-0.527250, -1.359329, 0.0]. You need to take care
+        of such cases in your code.
+
+        :param three_points: Three reference points to define the transformation.
+        :param eps: A small value to avoid division by zero.
+        :return: A transformation object. After applying the translation and
+            rotation to the reference backbone, the coordinates will
+            approximately equal to the input coordinates.
+        """
+        if isinstance(three_points, tuple):
+            three_points = torch.stack(three_points)
+
+        # allow for any number of leading dimensions
+
+        (x1, x2, x3), unpack_one = pack_one(three_points, "three * d")
+
+        # main algorithm
+
+        t = -1 * x2
+        x1 = x1 + t
+        x3 = x3 + t
+
+        x3_x, x3_y, x3_z = [x3[..., i] for i in range(3)]
+        norm = torch.sqrt(eps + x3_x**2 + x3_y**2)
+        sin_x3_1 = -x3_y / norm
+        cos_x3_1 = x3_x / norm
+
+        x3_1_R = sin_x3_1.new_zeros((*sin_x3_1.shape, 3, 3))
+        x3_1_R[..., 0, 0] = cos_x3_1
+        x3_1_R[..., 0, 1] = -1 * sin_x3_1
+        x3_1_R[..., 1, 0] = sin_x3_1
+        x3_1_R[..., 1, 1] = cos_x3_1
+        x3_1_R[..., 2, 2] = 1
+
+        norm = torch.sqrt(eps + x3_x**2 + x3_y**2 + x3_z**2)
+        sin_x3_2 = x3_z / norm
+        cos_x3_2 = torch.sqrt(x3_x**2 + x3_y**2) / norm
+
+        x3_2_R = sin_x3_2.new_zeros((*sin_x3_2.shape, 3, 3))
+        x3_2_R[..., 0, 0] = cos_x3_2
+        x3_2_R[..., 0, 2] = sin_x3_2
+        x3_2_R[..., 1, 1] = 1
+        x3_2_R[..., 2, 0] = -1 * sin_x3_2
+        x3_2_R[..., 2, 2] = cos_x3_2
+
+        x3_R = einsum(x3_2_R, x3_1_R, "n i j, n j k -> n i k")
+        x1 = einsum(x3_R, x1, "n i j, n j -> n i")
+
+        _, x1_y, x1_z = [x1[..., i] for i in range(3)]
+        norm = torch.sqrt(eps + x1_y**2 + x1_z**2)
+        sin_x1 = -x1_z / norm
+        cos_x1 = x1_y / norm
+
+        x1_R = sin_x3_2.new_zeros((*sin_x3_2.shape, 3, 3))
+        x1_R[..., 0, 0] = 1
+        x1_R[..., 1, 1] = cos_x1
+        x1_R[..., 1, 2] = -1 * sin_x1
+        x1_R[..., 2, 1] = sin_x1
+        x1_R[..., 2, 2] = cos_x1
+
+        R = einsum(x1_R, x3_R, "n i j, n j k -> n i k")
+
+        R = R.transpose(-1, -2)
+        t = -1 * t
+
+        # unpack
+
+        R = unpack_one(R, "* r1 r2")
+        t = unpack_one(t, "* c")
+
+        return R, t
diff --git a/tests/test_af3.py b/tests/test_af3.py
@@ -19,6 +19,7 @@
     MultiChainPermutationAlignment,
     ExpressCoordinatesInFrame,
     RigidFrom3Points,
+    RigidFromReference3Points,
     ComputeAlignmentError,
     CentreRandomAugmentation,
     PairformerStack,
@@ -245,6 +246,13 @@ def test_rigid_from_three_points():
     rotation, _ = rigid_from_3_points((points, points, points))
     assert rotation.shape == (7, 11, 23, 3, 3)
 
+def test_rigid_from_reference_three_points():
+    rigid_from_reference_3_points = RigidFromReference3Points()
+
+    points = torch.randn(7, 11, 23, 3)
+    rotation, _ = rigid_from_reference_3_points((points, points, points))
+    assert rotation.shape == (7, 11, 23, 3, 3)
+
 def test_deriving_frames_for_ligands():
     points = torch.tensor([
         [1., 1., 1.],

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@`
`78`	`78`	`from alphafold3_pytorch.utils.model_utils import (`
`79`	`79`	`ExpressCoordinatesInFrame,`
`80`	`80`	`RigidFrom3Points,`
	`81`	`+ RigidFromReference3Points,`
`81`	`82`	`calculate_weighted_rigid_align_weights,`
`82`	`83`	`package_available,`
`83`	`84`	`)`