wrap fp4 rounding in torch.compile (#331)

dsikka · web-flow · commit 47886751de4e · 2025-06-02T11:00:20.000-04:00
diff --git a/src/compressed_tensors/quantization/quant_args.py b/src/compressed_tensors/quantization/quant_args.py
@@ -53,6 +53,7 @@ class FP4_E2M1_DATA(FloatArgs):
     min = -6.0
 
     @staticmethod
+    @torch.compile
     def cast_to_fp4(x):
         sign = torch.sign(x)
         x = torch.abs(x)
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -81,7 +81,7 @@ def calculate_qparams(
         currently only applied/supported for Fp4
 
     :return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
-        scale if of dtype FP8
+        scale is of dtype FP8
     """
     # based on the implementations for consuming quantized values,
     # 0.0 must always be representable within the quantized range
@@ -490,7 +490,6 @@ def generate_global_scale(
     attempts to use the entire FP8 dtype range while mapping a per-group max
     to the FP4 max.
     """
-    scale_dtype = scale_data.dtype
     tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
     global_scale = scale_data.max * quant_data.max / tensor_amax
     return global_scale.to(dtype)