Add scale decompression support

shanjiaz · shanjiaz · commit 7dc3c9f46ec5 · 2025-12-09T18:12:46.000Z
Signed-off-by: shanjiaz &lt;zsjwpianpian@gmail.com&gt;
diff --git a/src/compressed_tensors/compressors/quantized_compressors/base.py b/src/compressed_tensors/compressors/quantized_compressors/base.py
@@ -140,18 +140,10 @@ def compress(
                         if value is None:
                             continue
 
-                if name.endswith("weight_scale") and self._skip_scale():
-                    continue
-
                 compressed_dict[name] = value.to(compression_device)
 
         return compressed_dict
 
-    def _skip_scale(self):
-        from compressed_tensors.compressors import NVFP4PackedCompressor
-
-        return isinstance(self, NVFP4PackedCompressor)
-
     def decompress(
         self,
         path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
diff --git a/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py
@@ -21,8 +21,9 @@
     BaseQuantizationCompressor,
 )
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization import QuantizationArgs
+from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
 from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
+from compressed_tensors.quantization.utils import calculate_qparam_shape
 from torch import Tensor
 
 
@@ -56,7 +57,6 @@ def compression_param_names(self) -> Tuple[str]:
         return (
             "weight_packed",
             "weight_scale",
-            "weight_zero_point",
             "weight_global_scale",
         )
 
@@ -79,6 +79,24 @@ def compression_param_info(
                 torch.uint8,
             ),
         }
+
+        # Add weight_scale and weight_global_scale for NVFP4/MXFP4
+        if quantization_args is not None and quantization_args.strategy in [
+            QuantizationStrategy.GROUP.value,
+            QuantizationStrategy.TENSOR_GROUP.value,
+        ]:
+            # Use centralized calculation for consistency and correctness
+            num_groups, scale_shape = calculate_qparam_shape(
+                weight_shape, quantization_args
+            )
+            output["weight_scale"] = (scale_shape, quantization_args.scale_dtype)
+
+            if quantization_args.strategy == QuantizationStrategy.TENSOR_GROUP.value:
+                output["weight_global_scale"] = (
+                    torch.Size((1,)),
+                    torch.float32,
+                )
+
         return output
 
     def compress_weight(
@@ -104,6 +122,11 @@ def compress_weight(
             weight_packed = weight_packed.to(device)
         compressed_dict["weight_packed"] = weight_packed
         compressed_dict["weight_scale"] = scale.to(quantization_args.scale_dtype)
+
+        # Include global_scale if provided (for TENSOR_GROUP strategy)
+        if global_scale is not None:
+            compressed_dict["weight_global_scale"] = global_scale
+
         return compressed_dict
 
     def decompress_weight(
diff --git a/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
@@ -111,7 +111,17 @@ def compress_weight(
         if device is not None:
             quantized_weight = quantized_weight.to(device)
 
-        return {"weight": quantized_weight}
+        compressed_dict = {"weight": quantized_weight}
+
+        # Include scale, zero_point, and g_idx if they exist
+        if scale is not None:
+            compressed_dict["weight_scale"] = scale
+        if zero_point is not None:
+            compressed_dict["weight_zero_point"] = zero_point
+        if g_idx is not None:
+            compressed_dict["weight_g_idx"] = g_idx
+
+        return compressed_dict
 
     def decompress_weight(
         self,
diff --git a/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
@@ -22,7 +22,7 @@
 from compressed_tensors.config import CompressionFormat
 from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
 from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
-from compressed_tensors.quantization.utils import can_quantize
+from compressed_tensors.quantization.utils import calculate_qparam_shape, can_quantize
 from torch import Tensor
 
 
@@ -69,20 +69,26 @@ def compression_param_info(
             "weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
             "weight_shape": (torch.Size((2,)), torch.int32),
         }
-        if not quantization_args.symmetric and quantization_args.strategy in [
+
+        # Add weight_scale - always needed for quantization
+        if quantization_args.strategy in [
             QuantizationStrategy.GROUP.value,
             QuantizationStrategy.CHANNEL.value,
         ]:
-            zp_factor = (
-                quantization_args.group_size
-                if quantization_args.strategy == QuantizationStrategy.GROUP.value
-                else weight_shape[-1]
+            # Use centralized calculation for consistency and correctness
+            num_groups, scale_shape = calculate_qparam_shape(
+                weight_shape, quantization_args
             )
+            output["weight_scale"] = (scale_shape, quantization_args.scale_dtype)
+
+            # Add weight_zero_point for asymmetric quantization
+            # Zero point has same num_groups as scale, but with packed rows
+            if not quantization_args.symmetric:
+                output["weight_zero_point"] = (
+                    torch.Size((packed_size_zp, num_groups)),
+                    torch.int32,
+                )
 
-            output["weight_zero_point"] = (
-                torch.Size((packed_size_zp, weight_shape[-1] // zp_factor)),
-                torch.int32,
-            )
         return output
 
     def compress_weight(
@@ -126,22 +132,36 @@ def compress_weight(
 
         packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
 
-        weight_shape = torch.tensor(weight.shape)
+        weight_shape = torch.tensor(weight.shape, dtype=torch.int32)
         if device is not None:
             packed_weight = packed_weight.to(device)
             weight_shape = weight_shape.to(device)
 
         compressed_dict["weight_shape"] = weight_shape
         compressed_dict["weight_packed"] = packed_weight
 
-        if not quantization_args.symmetric and quantization_args.strategy in [
-            QuantizationStrategy.GROUP.value,
-            QuantizationStrategy.CHANNEL.value,
-        ]:
-            packed_zp = pack_to_int32(
-                zero_point, quantization_args.num_bits, packed_dim=0
-            )
-            compressed_dict["weight_zero_point"] = packed_zp.contiguous()
+        # Include scale if provided
+        if scale is not None:
+            compressed_dict["weight_scale"] = scale
+
+        # Include zero_point if provided
+        if zero_point is not None:
+            if not quantization_args.symmetric and quantization_args.strategy in [
+                QuantizationStrategy.GROUP.value,
+                QuantizationStrategy.CHANNEL.value,
+            ]:
+                packed_zp = pack_to_int32(
+                    zero_point, quantization_args.num_bits, packed_dim=0
+                )
+                compressed_dict["weight_zero_point"] = packed_zp.contiguous()
+            else:
+                # For symmetric or other strategies, include unpacked zero_point
+                compressed_dict["weight_zero_point"] = zero_point
+
+        # Include g_idx if provided
+        if g_idx is not None:
+            compressed_dict["weight_g_idx"] = g_idx
+
         return compressed_dict
 
     def decompress_weight(
@@ -172,11 +192,13 @@ def decompress_weight(
                 zero_point is not None
             ), "Asymmetric quantization requires zero-point values"
             original_zp_shape = (original_shape[0], scale.shape[-1])
-            zero_point = unpack_from_int32(
-                zero_point, num_bits, original_zp_shape, packed_dim=0
-            )
-            # Update the compressed_data dict with the unpacked zero_point
-            compressed_data["weight_zero_point"] = zero_point
+            # Only unpack if it's still packed (int32)
+            if zero_point.dtype == torch.int32:
+                zero_point = unpack_from_int32(
+                    zero_point, num_bits, original_zp_shape, packed_dim=0
+                )
+                # Update the compressed_data dict with the unpacked zero_point
+                compressed_data["weight_zero_point"] = zero_point
 
         decompressed_weight = dequantize(
             x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -48,6 +48,7 @@
     "calculate_qparams",
     "generate_gparam",
     "strategy_cdiv",
+    "calculate_qparam_shape",
 ]
 
 # target the self_attn layer
@@ -448,6 +449,50 @@ def strategy_cdiv(
     return dividend
 
 
+def calculate_qparam_shape(
+    weight_shape: torch.Size,
+    quantization_args: QuantizationArgs,
+) -> Tuple[int, torch.Size]:
+    """
+    Calculate the number of groups and scale/zero_point shape for quantization.
+
+    This centralizes the logic for determining quantization parameter shapes,
+    ensuring consistency with initialize_qparams and avoiding floor division bugs.
+
+    :param weight_shape: shape of the weight tensor to be quantized
+    :param quantization_args: quantization configuration
+    :return: tuple of (num_groups, expected_shape) where:
+        - num_groups: number of quantization groups
+        - expected_shape: shape for scale/zero_point tensors (weight_shape[0], num_groups)
+    """
+    strategy = quantization_args.strategy
+
+    if strategy == QuantizationStrategy.TENSOR:
+        num_groups = 1
+        expected_shape = torch.Size((1,))
+
+    elif strategy == QuantizationStrategy.CHANNEL:
+        num_groups = 1
+        expected_shape = torch.Size((weight_shape[0], 1))
+
+    elif strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
+        group_size = quantization_args.group_size
+        if group_size is None:
+            raise ValueError(f"{strategy} quantization requires group_size to be set")
+
+        # Use strategy_cdiv for proper ceiling division and validation
+        num_groups = strategy_cdiv(weight_shape[-1], group_size, strategy)
+        expected_shape = torch.Size((weight_shape[0], num_groups))
+
+    else:
+        raise ValueError(
+            f"Unsupported quantization strategy: {strategy}. "
+            f"Supported strategies: TENSOR, CHANNEL, GROUP, TENSOR_GROUP"
+        )
+
+    return num_groups, expected_shape
+
+
 def _get_dtype_eps(dtype: torch.dtype) -> float:
     if dtype == FP8_E4M3_DATA.dtype:
         return 0.125
diff --git a/tests/test_compressors/model_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py
@@ -214,6 +214,7 @@ def test_composability(tmp_path, sparsity_config, quantization_config):
                 "linear.row_offsets",
                 "linear.shape",
                 "linear.weight_scale",
+                "linear.weight_zero_point",
             },
         )
     ],
@@ -572,9 +573,12 @@ def test_decompress_model(model_stub, comp_stub):
     # equivalent to decompressing from disk
     assert decompressed.keys() == true_decompressed.keys()
     for key in decompressed.keys():
-        assert (
-            decompressed[key].dtype == true_decompressed[key].dtype
-        ), f"{key} dtypes not equal"
+        # Skip dtype check for weight_shape - int32/int64 are functionally equivalent
+        # torch.Size() works identically with both, old checkpoints use int64, new use int32
+        if not key.endswith("weight_shape"):
+            assert (
+                decompressed[key].dtype == true_decompressed[key].dtype
+            ), f"{key} dtypes not equal"
         assert torch.all(
             decompressed[key] == true_decompressed[key]
         ), f"{key} values not equal"
diff --git a/tests/test_compressors/quantized_compressors/test_fp8_quant.py b/tests/test_compressors/quantized_compressors/test_fp8_quant.py
@@ -89,8 +89,8 @@ def test_quant_format(strategy, group_size, sc, zp):
         dense_state_dict, names_to_scheme=module_name_to_scheme
     )
 
-    # state_dict params should be the same, minus the zero_point if symmetric
-    assert len(dense_state_dict) == len(compressed_state_dict) + 1
+    # state_dict params should be the same (zero_point included even for symmetric)
+    assert len(dense_state_dict) == len(compressed_state_dict)
 
     # check compressed to int8
     assert compressed_state_dict["dummy.weight_scale"].dtype == torch.float32
diff --git a/tests/test_compressors/quantized_compressors/test_int_quant.py b/tests/test_compressors/quantized_compressors/test_int_quant.py
@@ -81,11 +81,8 @@ def test_quant_format(strategy, symmetric, group_size, sc, zp):
         dense_state_dict, names_to_scheme=quantized_modules_to_scheme
     )
 
-    # state_dict params should be the same, minus the zero_point if symmetric
-    if symmetric:
-        assert len(dense_state_dict) == len(compressed_state_dict) + 1
-    else:
-        assert len(dense_state_dict) == len(compressed_state_dict)
+    # state_dict params should be the same (zero_point included even for symmetric)
+    assert len(dense_state_dict) == len(compressed_state_dict)
 
     # check compressed to int8
     assert compressed_state_dict["dummy.weight"].dtype == torch.int8

Original file line number	Diff line number	Diff line change
`@@ -89,8 +89,8 @@ def test_quant_format(strategy, group_size, sc, zp):`
`89`	`89`	`dense_state_dict, names_to_scheme=module_name_to_scheme`
`90`	`90`	`)`
`91`	`91`
`92`		`- # state_dict params should be the same, minus the zero_point if symmetric`
`93`		`- assert len(dense_state_dict) == len(compressed_state_dict) + 1`
	`92`	`+ # state_dict params should be the same (zero_point included even for symmetric)`
	`93`	`+ assert len(dense_state_dict) == len(compressed_state_dict)`
`94`	`94`
`95`	`95`	`# check compressed to int8`
`96`	`96`	`assert compressed_state_dict["dummy.weight_scale"].dtype == torch.float32`