minor refactoring of contracter

cw-tan · cw-tan · commit 534111187808 · 2025-12-16T22:48:50.000-05:00
diff --git a/allegro/nn/_strided/_contract.py b/allegro/nn/_strided/_contract.py
@@ -202,15 +202,19 @@ def forward(
             dim=0,
             dim_size=scatter_dim_size,
         )
-        x2 = torch.index_select(x2_scatter, 0, idxs)
 
         # === perform TP ===
         # convert to strided shape
         x1 = x1.reshape(-1, self.mul, self.base_dim1)
-        x2 = x2.reshape(-1, self.mul, self.base_dim2)
-        return self._contract(x1, x2)
+        x2_scatter = x2_scatter.reshape(-1, self.mul, self.base_dim2)
+        return self._contract_conv(x1, x2_scatter, idxs)
+
+    def _contract_conv(
+        self, x1: torch.Tensor, x2: torch.Tensor, idxs: torch.Tensor
+    ) -> torch.Tensor:
+        # index select from scattered x2
+        x2 = torch.index_select(x2, 0, idxs)
 
-    def _contract(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
         # for shared weights, we can precontract weights and w3j so they can be frozen together
         # this is usually advantageous for inference, since the weights would have to be
         # multiplied in anyway at some point
diff --git a/allegro/nn/_strided/_cueq_contracter.py b/allegro/nn/_strided/_cueq_contracter.py
@@ -138,5 +138,4 @@ def forward(
                 .contiguous()
             )
         else:
-            x2 = torch.index_select(x2_scatter, 0, idxs)
-            return self._contract(x1, x2)
+            return self._contract_conv(x1, x2_scatter, idxs)
diff --git a/allegro/nn/_strided/_flashallegro.py b/allegro/nn/_strided/_flashallegro.py
@@ -722,12 +722,14 @@ def __init__(self, **kwargs):
             "p_to_nnz_mapper_bwd2", p_to_nnz_mapper_bwd2, persistent=False
         )
 
-    def _contract(self, x1, x2):
+    def _contract_conv(self, x1, x2, idxs):
         # runtime conditions for triggering kernel code path
         if x1.is_cuda and not self.training:
+            # index select for triton kernel
+            x2_indexed = torch.index_select(x2, 0, idxs)
             return torch.ops.triton.flashallegro_forward(
                 x1,
-                x2,
+                x2_indexed,
                 self.mode,
                 self.indptr_fwd,
                 self.indptr_bwd1,
@@ -752,4 +754,4 @@ def _contract(self, x1, x2):
                 x1.dtype,
             )
         else:
-            return super()._contract(x1, x2)
+            return super()._contract_conv(x1, x2, idxs)

Original file line number	Diff line number	Diff line change
`@@ -138,5 +138,4 @@ def forward(`
`138`	`138`	`.contiguous()`
`139`	`139`	`)`
`140`	`140`	`else:`
`141`		`- x2 = torch.index_select(x2_scatter, 0, idxs)`
`142`		`- return self._contract(x1, x2)`
	`141`	`+ return self._contract_conv(x1, x2_scatter, idxs)`