Merge branch 'kylesayrs/transform-precision' into kylesayrs/transform-merge

kylesayrs · kylesayrs · commit 684db8b5692d · 2025-08-05T16:18:17.000Z
diff --git a/src/compressed_tensors/transform/factory/hadamard.py b/src/compressed_tensors/transform/factory/hadamard.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
 from typing import Optional
 
 import torch
@@ -54,15 +53,14 @@ def create_transform(self, module: Module, args: TransformArgs):
         """
         assert hasattr(module, "weight")
         size = get_transform_size(module, args.location, self.scheme.head_dim)
-        dtype = module.weight.dtype
+        dtype = self.scheme.precision
         device = get_offloaded_device(module)
         exec_device = get_execution_device(module)
-        precision = self.scheme.precision
 
         factory_kwargs = {"construct_device": exec_device}
         weight = self.weights.get(size, dtype, device, factory_kwargs=factory_kwargs)
         perm = self.perms[weight] if self.scheme.randomize else None
-        return HadamardTransform(weight, perm, args, precision, type(module))
+        return HadamardTransform(weight, perm, self.scheme, args, type(module))
 
     def _create_weight(
         self,
@@ -86,17 +84,19 @@ def __init__(
         self,
         weight: Parameter,
         perm: Optional[Parameter],
+        scheme: TransformScheme,
         args: TransformArgs,
         precision: torch.dtype,
         module_type: type[torch.nn.Module],
     ):
         super().__init__()
         self.weight = weight
         self.perm = perm
+        self.scheme = scheme
         self.args = args
         self.precision = precision
         self.module_type = module_type
-        self._scale = torch.tensor(weight.size(0), dtype=self.precision).sqrt()
+        self._scale = torch.tensor(weight.size(0), dtype=self.scheme.precision).sqrt()
 
     def forward(self, value: Tensor) -> Tensor:
         weight = self.weight
@@ -109,8 +109,8 @@ def forward(self, value: Tensor) -> Tensor:
 
         return (
             apply_transform_weight(
-                weight.to(self.precision),
-                value.to(self.precision),
+                weight.to(self.scheme.precision),
+                value.to(self.scheme.precision),
                 self.args.location,
                 self.module_type,
             )
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -24,7 +24,7 @@
 from compressed_tensors.utils import get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
 from torch import Tensor, device, dtype
-from torch.nn import Linear, Module, Parameter
+from torch.nn import Module, Parameter
 
 
 @TransformFactory.register("random-matrix")
@@ -52,7 +52,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         """
         assert hasattr(module, "weight")
         size = get_transform_size(module, args.location, self.scheme.head_dim)
-        dtype = module.weight.dtype
+        dtype = self.scheme.precision
         device = get_offloaded_device(module)
         precision = self.scheme.precision
 
@@ -79,29 +79,31 @@ class RandomMatrixTransform(TransformBase):
     def __init__(
         self,
         weight: Tensor,
+        scheme: TransformScheme,
         args: TransformArgs,
         precision: torch.dtype,
         module_type: type[torch.nn.Module],
     ):
         super().__init__()
         self.weight = weight  # is an inverse if args.inverse
+        self.scheme = scheme
         self.args = args
         self.precision = precision
         self.module_type = module_type
 
     def forward(self, value: Tensor) -> Parameter:
         return apply_transform_weight(
-            self.weight.to(self.precision),
-            value.to(self.precision),
+            self.weight.to(self.scheme.precision),
+            value.to(self.scheme.precision),
             self.args.location,
             self.module_type,
         ).to(value.dtype)
 
     def right_inverse(self, value: Tensor) -> Tensor:
         inverse = high_precision_invert(self.weight)
         return apply_transform_weight(
-            inverse.to(self.precision),
-            value.to(self.precision),
+            inverse.to(self.scheme.precision),
+            value.to(self.scheme.precision),
             self.args.location,
             self.module_type,
         ).to(value.dtype)
diff --git a/src/compressed_tensors/transform/transform_scheme.py b/src/compressed_tensors/transform/transform_scheme.py
@@ -36,11 +36,13 @@ class TransformScheme(BaseModel):
     :param randomize: True if uniquely randomized transform weights should be used,
         otherwise use identical transform weights where applicable
     :param requires_grad: True if weights include gradients for training
+    :param precision: Precision at which this transform should be applied. This applies
+        to both weight fusing and online rotations
     """
 
     type: str
     apply: List[TransformArgs] = Field(default_factory=list)
     randomize: bool = Field(default=False)
     requires_grad: bool = Field(default=False)
     head_dim: Optional[int] = Field(default=None)
-    precision: TorchDtype = Field(default=torch.bfloat16)
+    precision: TorchDtype = Field(default=torch.float32)
diff --git a/src/compressed_tensors/utils/match.py b/src/compressed_tensors/utils/match.py
@@ -15,7 +15,7 @@
 import logging
 import re
 from collections.abc import Generator
-from typing import Iterable, Tuple
+from typing import Iterable, Mapping, Optional, Tuple
 
 import torch
 from compressed_tensors.utils.internal import InternalModule
@@ -32,10 +32,14 @@
 ]
 
 
+FusedMappping = Mapping[str, Iterable[str]]
+
+
 def match_named_modules(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
     """
@@ -45,16 +49,18 @@ def match_named_modules(
     :param model: model containing submodules to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
     unmatched_targets = set(targets)
     for name, module in model.named_modules():
         for target in targets:
-            if is_match(name, module, target):
+            if is_match(name, module, target, fused):
                 unmatched_targets -= {target}
 
-                if not any(is_match(name, module, ign) for ign in ignore):
+                if not any(is_match(name, module, ign, fused) for ign in ignore):
                     yield name, module
 
     if warn_on_fail:
@@ -68,6 +74,7 @@ def match_named_parameters(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
     """
@@ -77,6 +84,8 @@ def match_named_parameters(
     :param model: model containing params to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any params in model
     :return: generator of fully-qualified param names, parent modules, and params
     """
@@ -88,10 +97,10 @@ def match_named_parameters(
         for param_name, param in module.named_parameters(recurse=False):
             param_fqn = f"{module_name}.{param_name}"
             for target in targets:
-                if _match_name(param_fqn, target):
+                if _match_name(param_fqn, target, fused):
                     unmatched_targets -= {target}
 
-                    if not any(_match_name(param_fqn, ign) for ign in ignore):
+                    if not any(_match_name(param_fqn, ign, fused) for ign in ignore):
                         yield param_fqn, module, param
 
     if warn_on_fail:
@@ -164,21 +173,56 @@ def match_modules_set(
         raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
 
 
-def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
+def is_match(
+    name: str,
+    module: torch.nn.Module,
+    target: str,
+    fused: Optional[FusedMappping] = None,
+) -> bool:
     """
     Returns true if either module name or module parent classes match against target
-    and the module is not an internal module
+    and the module is not an internal module. The name and module may refer to a fused
+    module defined by vLLM. In these cases, a `fused` mapping must be provided.
+
+    For example, in `vllm/model_executor/models/llama.py`:
+    ```python
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"]
+    }
+    ```
+
+    :param name: name of module
+    :param module: module to match
+    :param target: target which matches name or module, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
     return not isinstance(module, InternalModule) and (
-        _match_name(name, target) or _match_class(module, target)
+        _match_name(name, target, fused) or _match_class(module, target)
     )
 
 
-def _match_name(name: str, target: str) -> bool:
+def _match_name(name: str, target: str, fused: Optional[FusedMappping] = None) -> bool:
     """
-    Returns true if target string begins with "re:" and
-    regex matches or if target string exactly matches name
+    Returns true if target string begins with "re:" and regex matches or if target
+    string exactly matches name. If the name refers to a fused module defined by vLLM,
+    a `fused` mapping must be provided.
+
+    :param name: name of module
+    :param target: target name, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
+    if fused is not None:
+        for fused_suffix in fused:
+            if name.endswith(fused_suffix):
+                name_stripped = name.removesuffix(fused_suffix)
+                return any(
+                    _match_name(name_stripped + shard_suffix, target)
+                    for shard_suffix in fused[fused_suffix]
+                )
+
     if target.startswith("re:"):
         return re.match(target.removeprefix("re:"), name) is not None
     else:
@@ -187,10 +231,20 @@ def _match_name(name: str, target: str) -> bool:
 
 def _match_class(module: torch.nn.Module, target: str) -> bool:
     """
-    Returns true if any torch parent class names match the target string exactly
+    Returns true if any torch parent class names match the target string exactly.
+    A special exception is made for vllm's `LinearBase` class which matches `Linear`
+
+    :param module: module to match
+    :param target: target which matches name or module
     """
     # will never match against a regex pattern since `:` is not allowed in class names
     return any(
-        issubclass(cls, torch.nn.Module) and cls.__name__ == target
+        (
+            issubclass(cls, torch.nn.Module)
+            and (
+                cls.__name__ == target
+                or (cls.__name__ == "LinearBase" and target == "Linear")
+            )
+        )
         for cls in module.__class__.__mro__
     )
diff --git a/tests/test_utils/test_match.py b/tests/test_utils/test_match.py
@@ -16,7 +16,6 @@
 
 import pytest
 import torch.nn as nn
-from accelerate import init_empty_weights
 
 # Assuming the module is named "module_matching" - adjust import as needed
 from compressed_tensors.utils import (
@@ -33,6 +32,11 @@ class DummyModel(nn.Module):
     """Test model for unit tests. Weights are initialized on meta device"""
 
     def __init__(self):
+        try:
+            from accelerate import init_empty_weights
+        except ImportError:
+            pytest.skip("Skipping weight init requires accelerate")
+
         super().__init__()
         with init_empty_weights():
             self.layer1 = nn.Linear(10, 20)
@@ -142,6 +146,15 @@ def test_custom_module(self):
         assert _match_class(model, "DummyModel") == True
         assert _match_class(model, "Module") == True
 
+    def test_linear_base(self):
+        """Test matching against vllm's LinearBase class"""
+
+        class LinearBase(nn.Module):
+            pass
+
+        linear = LinearBase()
+        assert _match_class(linear, "Linear") == True
+
 
 class TestIsMatch:
     """Test cases for is_match function"""
@@ -180,6 +193,23 @@ class InternalLinear(InternalModule, nn.Linear):
         linear = InternalLinear(10, 20)
         assert is_match("layer1", linear, "re:layer.*") == False
 
+    def test_fused_mapping(self):
+        """"""
+        linear = nn.Linear(10, 20)
+        mapping = {
+            "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+            "gate_up_proj": ["gate_proj", "up_proj"],
+        }
+
+        assert is_match("dummy.qkv_proj", linear, "re:.*q_proj", mapping) == True
+        assert is_match("dummy.qkv_proj", linear, "re:.*k_proj", mapping) == True
+        assert is_match("dummy.qkv_proj", linear, "re:.*v_proj", mapping) == True
+        assert is_match("dummy.qkv_proj", linear, "Linear", mapping) == True
+
+        assert is_match("dummy.gate_up_proj", linear, "re:.*gate_proj", mapping) == True
+        assert is_match("dummy.gate_up_proj", linear, "re:.*up_proj", mapping) == True
+        assert is_match("dummy.gate_up_proj", linear, "Linear", mapping) == True
+
 
 class TestMatchNamedModules:
     """Test cases for match_named_modules function"""