code cleanup and simplification

kylesayrs · kylesayrs · commit 9039eb59c090 · 2025-07-09T16:13:19.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/transform/factory/hadamard.py b/src/compressed_tensors/transform/factory/hadamard.py
@@ -20,7 +20,7 @@
 from compressed_tensors.transform.utils.hadamard import deterministic_hadamard_matrix
 from compressed_tensors.transform.utils.matrix import (
     apply_transform_weight,
-    get_matrix_size,
+    get_transform_size,
 )
 from compressed_tensors.utils import get_execution_device, get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
@@ -52,7 +52,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         :param args: defines how the transform will be applied to the module
         """
         assert isinstance(module, Linear)
-        size = get_matrix_size(module, args.location, self.scheme.head_dim)
+        size = get_transform_size(module, args.location, self.scheme.head_dim)
         dtype = module.weight.dtype
         device = get_offloaded_device(module)
         exec_device = get_execution_device(module)
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -19,7 +19,7 @@
 from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
 from compressed_tensors.transform.utils.matrix import (
     apply_transform_weight,
-    get_matrix_size,
+    get_transform_size,
 )
 from compressed_tensors.utils import get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
@@ -51,7 +51,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         :param args: defines how the transform will be applied to the module
         """
         assert isinstance(module, Linear)
-        size = get_matrix_size(module, args.location, self.scheme.head_dim)
+        size = get_transform_size(module, args.location, self.scheme.head_dim)
         dtype = module.weight.dtype
         device = get_offloaded_device(module)
 
diff --git a/src/compressed_tensors/transform/utils/matrix.py b/src/compressed_tensors/transform/utils/matrix.py
@@ -12,42 +12,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, Tuple, Callable
+from typing import Callable, Optional, Tuple
 
 import torch
 from compressed_tensors.transform import TransformLocation
 
 
-__all__ = ["get_matrix_size", "apply_transform_weight"]
+__all__ = ["get_transform_size", "apply_transform_weight"]
 
 
-def get_matrix_size(
+def get_transform_size(
     module: torch.nn.Module,
     location: TransformLocation,
     head_dim: Optional[int] = None,
 ) -> int:
     """
-    Determine the size of a matrix given its location on the module
+    Determine the size of a transform matrix given its location on the module
 
     :param module: module that matrix will be applied to
     :param location: location on module
-    :TODO head_dim:
+    :param head_dim: size of head when transform is applied to mha
     :return: size of matrix
     """
-    assert isinstance(module, torch.nn.Linear)
-
-    if location in (TransformLocation.INPUT, TransformLocation.WEIGHT_INPUT):
-        size = module.in_features
+    if isinstance(module, torch.nn.Linear):
+        if location in (TransformLocation.INPUT, TransformLocation.WEIGHT_INPUT):
+            size = module.in_features
+        else:
+            size = module.out_features
     else:
-        size = module.out_features
+        raise NotImplementedError(f"Transforms on {type(module)} are not supported")
 
     if head_dim is not None:
         if size % head_dim != 0:
-            raise ValueError("Cannot ")
-        return head_dim
+            raise ValueError(
+                f"{head_dim} must divide {size} for {type(module)} at {location}"
+            )
 
-    else:
-        return size
+        size = head_dim
+
+    return size
 
 
 def apply_transform_weight(
@@ -56,22 +59,22 @@ def apply_transform_weight(
     location: TransformLocation,
     module_type: type[torch.nn.Module],
 ) -> torch.Tensor:
-    if module_type == torch.nn.Linear:
-        fn, axis = get_linear_transform_fn(module_type, location)
+    fn, axis = get_linear_transform_fn(module_type, location)
 
-    else:
-        raise NotImplementedError(
-            f"Applying transforms to {module_type} is not supported"
-        )
-    
     assert weight.shape[0] == weight.shape[1]
     head_dim = weight.shape[0]
     num_heads = value.shape[axis] // head_dim
 
+    value_dtype = value.dtype
+    value = value.to(torch.float64)
+    weight = weight.to(torch.float64)
+
     value = value.unflatten(axis, (num_heads, head_dim))
     value = fn(weight, value)
     value = value.flatten(axis - 1, axis)
 
+    value = value.to(value_dtype)
+
     return value
 
 
@@ -133,10 +136,10 @@ def get_linear_transform_fn(
         elif location == TransformLocation.OUTPUT:
             fn = lambda weight, value: value @ weight
             axis = -1
-    
+
     if fn is None:
         raise NotImplementedError(
             f"Applying transforms to {module_type} {location} is not supported"
         )
 
-    return fn, axis
+    return fn, axis
diff --git a/tests/test_transform/conftest.py b/tests/test_transform/conftest.py
@@ -44,15 +44,20 @@ def __init__(
         self.num_key_value_groups = num_attention_heads // num_key_value_heads
         self.head_dim = hidden_size // num_attention_heads
         self.scaling = self.head_dim**-0.5
+        assert hidden_size >= num_attention_heads * self.head_dim
 
-        self.q_proj = torch.nn.Linear(hidden_size, hidden_size, bias=False)
+        self.q_proj = torch.nn.Linear(
+            hidden_size, num_attention_heads * self.head_dim, bias=False
+        )
         self.k_proj = torch.nn.Linear(
             hidden_size, num_key_value_heads * self.head_dim, bias=False
         )
         self.v_proj = torch.nn.Linear(
             hidden_size, num_key_value_heads * self.head_dim, bias=False
         )
-        self.o_proj = torch.nn.Linear(hidden_size, hidden_size, bias=False)
+        self.o_proj = torch.nn.Linear(
+            num_attention_heads * self.head_dim, hidden_size, bias=False
+        )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         batch_size, seq_len, hidden_size = hidden_states.shape
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
@@ -93,7 +93,7 @@ def test_correctness_model_offload(type, randomized, model_apply):
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
 @pytest.mark.parametrize("randomized", (True, False))
 @pytest.mark.parametrize("head_dim", (16, 32))
-def test_correctness_heads(type, randomized, head_dim, offload=False):
+def test_correctness_heads(type, randomized, head_dim):
     hidden_size = 64
 
     model = torch.nn.ModuleDict(
@@ -129,10 +129,10 @@ def test_correctness_heads(type, randomized, head_dim, offload=False):
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
 @pytest.mark.parametrize("randomized", (True, False))
-@pytest.mark.parametrize("head_dim", (8, 16))
-def test_correctness_attention_heads(type, randomized, head_dim, offload=False):
-    hidden_size = 4096
-    num_attention_heads = 32
+@pytest.mark.parametrize("head_dim", (4, 8))
+def test_correctness_attention_heads(type, randomized, head_dim):
+    hidden_size = 64
+    num_attention_heads = 8
 
     attention = MockAttention(
         hidden_size=hidden_size,