merge main

brian-dellabetta · brian-dellabetta · commit 58257eb63009 · 2025-08-11T19:32:31.000Z
diff --git a/src/compressed_tensors/compressors/model_compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
@@ -147,7 +147,7 @@ def from_compression_config(
 
         sparsity_config = cls.parse_sparsity_config(compression_config)
         quantization_config = cls.parse_quantization_config(compression_config)
-        # NOTE: transfrom config is not support by ctconfig yet
+        # TODO: transform config is not support by CompressedTensorsConfig yet
 
         if sparsity_config is None and quantization_config is None:
             return None
@@ -207,7 +207,7 @@ def from_pretrained_model(
 
     @staticmethod
     def parse_sparsity_config(
-        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
     ) -> Union[Dict[str, Any], None]:
         """
         Parse sparsity config from quantization/compression config. Sparsity
@@ -227,7 +227,7 @@ def parse_sparsity_config(
 
     @staticmethod
     def parse_quantization_config(
-        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
     ) -> Union[Dict[str, Any], None]:
         """
         Parse quantization config from quantization/compression config. The
diff --git a/src/compressed_tensors/transform/factory/hadamard.py b/src/compressed_tensors/transform/factory/hadamard.py
@@ -95,6 +95,7 @@ def __init__(
         self.args = args
         self.module_type = module_type
         self._scale = torch.tensor(weight.size(0), dtype=self.scheme.precision).sqrt()
+        self._precision = scheme.precision if args.is_online() else torch.float64
 
     def forward(self, value: Tensor) -> Tensor:
         weight = self.weight
@@ -107,8 +108,8 @@ def forward(self, value: Tensor) -> Tensor:
 
         return (
             apply_transform_weight(
-                weight.to(self.scheme.precision),
-                value.to(self.scheme.precision),
+                weight.to(self._precision),
+                value.to(self._precision),
                 self.args.location,
                 self.module_type,
             )
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -87,20 +87,21 @@ def __init__(
         self.scheme = scheme
         self.args = args
         self.module_type = module_type
+        self._precision = scheme.precision if args.is_online() else torch.float64
 
     def forward(self, value: Tensor) -> Parameter:
         return apply_transform_weight(
-            self.weight.to(self.scheme.precision),
-            value.to(self.scheme.precision),
+            self.weight.to(self._precision),
+            value.to(self._precision),
             self.args.location,
             self.module_type,
         ).to(value.dtype)
 
     def right_inverse(self, value: Tensor) -> Tensor:
         inverse = high_precision_invert(self.weight)
         return apply_transform_weight(
-            inverse.to(self.scheme.precision),
-            value.to(self.scheme.precision),
+            inverse.to(self._precision),
+            value.to(self._precision),
             self.args.location,
             self.module_type,
         ).to(value.dtype)
diff --git a/src/compressed_tensors/transform/transform_args.py b/src/compressed_tensors/transform/transform_args.py
@@ -68,3 +68,9 @@ def wrap_singleton(cls, value):
         if isinstance(value, str):
             return [value]
         return value
+
+    def is_online(self) -> bool:
+        return self.location not in (
+            TransformLocation.WEIGHT_INPUT,
+            TransformLocation.WEIGHT_OUTPUT,
+        )
diff --git a/src/compressed_tensors/transform/transform_scheme.py b/src/compressed_tensors/transform/transform_scheme.py
@@ -36,8 +36,8 @@ class TransformScheme(BaseModel):
     :param randomize: True if uniquely randomized transform weights should be used,
         otherwise use identical transform weights where applicable
     :param requires_grad: True if weights include gradients for training
-    :param precision: Precision at which this transform should be applied. This applies
-        to both weight fusing and online rotations
+    :param precision: Precision at which this transform should be applied during online
+        rotations. Fused (offline) rotations are always performed in float64
     """
 
     type: str
diff --git a/src/compressed_tensors/utils/type.py b/src/compressed_tensors/utils/type.py
@@ -36,7 +36,7 @@ def validate_from_str(name: str) -> torch.dtype:
             try:
                 value = getattr(torch, name)
                 assert isinstance(value, torch.dtype)
-            except AttributeError:
+            except Exception:
                 raise ValueError(f"No such torch dtype `torch.{name}`")
 
             return value