clean up reshaping

kylesayrs · kylesayrs · commit f220fb9a2557 · 2025-07-09T15:50:51.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/transform/factory/hadamard.py b/src/compressed_tensors/transform/factory/hadamard.py
@@ -60,7 +60,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         factory_kwargs = {"construct_device": exec_device}
         weight = self.weights.get(size, dtype, device, factory_kwargs=factory_kwargs)
         perm = self.perms[weight] if self.scheme.randomize else None
-        return HadamardTransform(weight, perm, args)
+        return HadamardTransform(weight, perm, args, type(module))
 
     def _create_weight(
         self,
@@ -85,11 +85,13 @@ def __init__(
         weight: Parameter,
         perm: Optional[Parameter],
         args: TransformArgs,
+        module_type: type[torch.nn.Module],
     ):
         super().__init__()
         self.weight = weight
         self.perm = perm
         self.args = args
+        self.module_type = module_type
 
     def forward(self, value: Tensor) -> Tensor:
         weight = self.weight
@@ -100,4 +102,6 @@ def forward(self, value: Tensor) -> Tensor:
         if self.args.inverse:
             weight = weight.T
 
-        return apply_transform_weight(weight, value, self.args.location)
+        return apply_transform_weight(
+            weight, value, self.args.location, self.module_type
+        )
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -59,7 +59,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         if args.inverse:
             weight = self.inverses[weight]
 
-        return RandomMatrixTransform(weight, args)
+        return RandomMatrixTransform(weight, args, type(module))
 
     def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
         # TODO: verify that weight is invertible (has non-zero determinant)
@@ -74,17 +74,27 @@ def _create_inverse(self, weight: Parameter) -> Parameter:
 
 
 class RandomMatrixTransform(TransformBase):
-    def __init__(self, weight: Tensor, args: TransformArgs):
+    def __init__(
+        self,
+        weight: Tensor,
+        args: TransformArgs,
+        module_type: type[torch.nn.Module],
+    ):
         super().__init__()
         self.weight = weight  # is an inverse if args.inverse
         self.args = args
+        self.module_type = module_type
 
     def forward(self, value: Tensor) -> Parameter:
-        return apply_transform_weight(self.weight, value, self.args.location)
+        return apply_transform_weight(
+            self.weight, value, self.args.location, self.module_type
+        )
 
     def right_inverse(self, value: Tensor) -> Tensor:
         inverse = high_precision_invert(self.weight)
-        return apply_transform_weight(inverse, value, self.args.location)
+        return apply_transform_weight(
+            inverse, value, self.args.location, self.module_type
+        )
 
 
 def high_precision_invert(weight: Tensor) -> Tensor:
diff --git a/src/compressed_tensors/transform/utils/matrix.py b/src/compressed_tensors/transform/utils/matrix.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional
+from typing import Optional, Tuple, Callable
 
 import torch
 from compressed_tensors.transform import TransformLocation
@@ -42,7 +42,8 @@ def get_matrix_size(
         size = module.out_features
 
     if head_dim is not None:
-        assert size % head_dim == 0
+        if size % head_dim != 0:
+            raise ValueError("Cannot ")
         return head_dim
 
     else:
@@ -53,18 +54,35 @@ def apply_transform_weight(
     weight: torch.Tensor,
     value: torch.Tensor,
     location: TransformLocation,
+    module_type: type[torch.nn.Module],
 ) -> torch.Tensor:
-    return apply_transform_weight_linear(weight, value, location)
+    if module_type == torch.nn.Linear:
+        fn, axis = get_linear_transform_fn(module_type, location)
 
+    else:
+        raise NotImplementedError(
+            f"Applying transforms to {module_type} is not supported"
+        )
+    
+    assert weight.shape[0] == weight.shape[1]
+    head_dim = weight.shape[0]
+    num_heads = value.shape[axis] // head_dim
 
-def apply_transform_weight_linear(
-    weight: torch.Tensor,
-    value: torch.Tensor,
+    value = value.unflatten(axis, (num_heads, head_dim))
+    value = fn(weight, value)
+    value = value.flatten(axis - 1, axis)
+
+    return value
+
+
+def get_linear_transform_fn(
+    module_type: type[torch.nn.Module],
     location: TransformLocation,
-):
+) -> Tuple[Callable[[torch.Tensor, torch.Tensor], torch.Tensor], int]:
     """
     Using the transform location, determine how to apply the transform weight to the
-    given value. For more info on input and output transforms, see `TransformLocation`
+    given value wrt linear weights. For more info on input and output transforms,
+    see `TransformLocation`
 
     The following explains how weights should be applied to values according to location
 
@@ -97,31 +115,28 @@ def apply_transform_weight_linear(
     :param location: determines how weight should be applied
     :return: value after transform weight has been applied
     """
-    value_shape = value.shape
-    weight_size = weight.shape[0]
-    assert weight.shape[0] == weight.shape[1]
-
-    if location == TransformLocation.INPUT:
-        num_heads = value_shape[1] // weight_size
-        value = value.reshape(value_shape[0], num_heads, weight_size)
-        ret = value @ weight
-
-    elif location == TransformLocation.WEIGHT_INPUT:
-        num_heads = value_shape[1] // weight_size
-        value = value.reshape(value_shape[0], num_heads, weight_size)
-        ret = value @ weight.T
-
-    elif location == TransformLocation.WEIGHT_OUTPUT:
-        num_heads = value_shape[0] // weight_size
-        value = value.reshape(num_heads, weight_size, value_shape[1])
-        ret = weight.T @ value
-
-    elif location == TransformLocation.OUTPUT:
-        num_heads = value_shape[1] // weight_size
-        value = value.reshape(value_shape[0], num_heads, weight_size)
-        ret = value @ weight
-
-    else:
-        raise NotImplementedError(f"{location} has not been implemented yet")
-
-    return ret.reshape(value_shape)
+    fn = axis = None
+
+    if module_type == torch.nn.Linear:
+        if location == TransformLocation.INPUT:
+            fn = lambda weight, value: value @ weight
+            axis = -1
+
+        elif location == TransformLocation.WEIGHT_INPUT:
+            fn = lambda weight, value: value @ weight.T
+            axis = -1
+
+        elif location == TransformLocation.WEIGHT_OUTPUT:
+            fn = lambda weight, value: weight.T @ value
+            axis = -2
+
+        elif location == TransformLocation.OUTPUT:
+            fn = lambda weight, value: value @ weight
+            axis = -1
+    
+    if fn is None:
+        raise NotImplementedError(
+            f"Applying transforms to {module_type} {location} is not supported"
+        )
+
+    return fn, axis
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
@@ -47,7 +47,7 @@ def test_correctness_linear(type, randomized):
         module, TransformArgs(targets="Linear", location="output", inverse=True)
     )
 
-    input = torch.rand((17, size[0]))
+    input = torch.rand((17, 5, size[0]))
     true_output = input @ module.weight.T
     input_transformed = input_tfm(input)
     weight_transformed = w_out_tfm(w_in_tfm(module.weight))
@@ -64,7 +64,7 @@ def test_correctness_model(type, randomized, model_apply, offload=False):
         model = offloaded_dispatch(model, torch.device("cuda"))
 
     # get output
-    input = torch.rand((17, model.fcs[0].in_features))
+    input = torch.rand((17, 5, model.fcs[0].in_features))
     if offload:
         input = input.to(torch.device("cuda"))
     true_output = model(input)