cleanup

shanjiaz · shanjiaz · commit 35e5442306ec · 2025-12-09T19:43:38.000Z
Signed-off-by: shanjiaz &lt;zsjwpianpian@gmail.com&gt;
diff --git a/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
@@ -111,17 +111,7 @@ def compress_weight(
         if device is not None:
             quantized_weight = quantized_weight.to(device)
 
-        compressed_dict = {"weight": quantized_weight}
-
-        # Include scale, zero_point, and g_idx if they exist
-        if scale is not None:
-            compressed_dict["weight_scale"] = scale
-        if zero_point is not None:
-            compressed_dict["weight_zero_point"] = zero_point
-        if g_idx is not None:
-            compressed_dict["weight_g_idx"] = g_idx
-
-        return compressed_dict
+        return {"weight": quantized_weight}
 
     def decompress_weight(
         self,
diff --git a/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
@@ -132,36 +132,22 @@ def compress_weight(
 
         packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
 
-        weight_shape = torch.tensor(weight.shape, dtype=torch.int32)
+        weight_shape = torch.tensor(weight.shape)
         if device is not None:
             packed_weight = packed_weight.to(device)
             weight_shape = weight_shape.to(device)
 
         compressed_dict["weight_shape"] = weight_shape
         compressed_dict["weight_packed"] = packed_weight
 
-        # Include scale if provided
-        if scale is not None:
-            compressed_dict["weight_scale"] = scale
-
-        # Include zero_point if provided
-        if zero_point is not None:
-            if not quantization_args.symmetric and quantization_args.strategy in [
-                QuantizationStrategy.GROUP.value,
-                QuantizationStrategy.CHANNEL.value,
-            ]:
-                packed_zp = pack_to_int32(
-                    zero_point, quantization_args.num_bits, packed_dim=0
-                )
-                compressed_dict["weight_zero_point"] = packed_zp.contiguous()
-            else:
-                # For symmetric or other strategies, include unpacked zero_point
-                compressed_dict["weight_zero_point"] = zero_point
-
-        # Include g_idx if provided
-        if g_idx is not None:
-            compressed_dict["weight_g_idx"] = g_idx
-
+        if not quantization_args.symmetric and quantization_args.strategy in [
+            QuantizationStrategy.GROUP.value,
+            QuantizationStrategy.CHANNEL.value,
+        ]:
+            packed_zp = pack_to_int32(
+                zero_point, quantization_args.num_bits, packed_dim=0
+            )
+            compressed_dict["weight_zero_point"] = packed_zp.contiguous()
         return compressed_dict
 
     def decompress_weight(
@@ -192,13 +178,11 @@ def decompress_weight(
                 zero_point is not None
             ), "Asymmetric quantization requires zero-point values"
             original_zp_shape = (original_shape[0], scale.shape[-1])
-            # Only unpack if it's still packed (int32)
-            if zero_point.dtype == torch.int32:
-                zero_point = unpack_from_int32(
-                    zero_point, num_bits, original_zp_shape, packed_dim=0
-                )
-                # Update the compressed_data dict with the unpacked zero_point
-                compressed_data["weight_zero_point"] = zero_point
+            zero_point = unpack_from_int32(
+                zero_point, num_bits, original_zp_shape, packed_dim=0
+            )
+            # Update the compressed_data dict with the unpacked zero_point
+            compressed_data["weight_zero_point"] = zero_point
 
         decompressed_weight = dequantize(
             x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
diff --git a/tests/test_compressors/model_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py
@@ -214,7 +214,6 @@ def test_composability(tmp_path, sparsity_config, quantization_config):
                 "linear.row_offsets",
                 "linear.shape",
                 "linear.weight_scale",
-                "linear.weight_zero_point",
             },
         )
     ],
diff --git a/tests/test_compressors/quantized_compressors/test_fp8_quant.py b/tests/test_compressors/quantized_compressors/test_fp8_quant.py
@@ -89,8 +89,8 @@ def test_quant_format(strategy, group_size, sc, zp):
         dense_state_dict, names_to_scheme=module_name_to_scheme
     )
 
-    # state_dict params should be the same (zero_point included even for symmetric)
-    assert len(dense_state_dict) == len(compressed_state_dict)
+    # state_dict params should be the same, minus the zero_point if symmetric
+    assert len(dense_state_dict) == len(compressed_state_dict) + 1
 
     # check compressed to int8
     assert compressed_state_dict["dummy.weight_scale"].dtype == torch.float32
diff --git a/tests/test_compressors/quantized_compressors/test_int_quant.py b/tests/test_compressors/quantized_compressors/test_int_quant.py
@@ -81,8 +81,11 @@ def test_quant_format(strategy, symmetric, group_size, sc, zp):
         dense_state_dict, names_to_scheme=quantized_modules_to_scheme
     )
 
-    # state_dict params should be the same (zero_point included even for symmetric)
-    assert len(dense_state_dict) == len(compressed_state_dict)
+    # state_dict params should be the same, minus the zero_point if symmetric
+    if symmetric:
+        assert len(dense_state_dict) == len(compressed_state_dict) + 1
+    else:
+        assert len(dense_state_dict) == len(compressed_state_dict)
 
     # check compressed to int8
     assert compressed_state_dict["dummy.weight"].dtype == torch.int8
diff --git a/tests/test_compressors/quantized_compressors/test_pack_quant.py b/tests/test_compressors/quantized_compressors/test_pack_quant.py
@@ -88,8 +88,9 @@ def test_quant_format(shape):
         dense_state_dict, names_to_scheme=quantized_modules_to_scheme
     )
 
-    # compressed state_dict adds one entry for shape and keeps zero_point
-    assert len(dense_state_dict) + 1 == len(compressed_state_dict)
+    # compressed state_dict adds one entry for shape
+    # but removes the zero points since we are symmetric
+    assert len(dense_state_dict) == len(compressed_state_dict)
 
     # check compressed and packed
     assert compressed_state_dict["dummy.weight_packed"].dtype == torch.int32

Original file line number	Diff line number	Diff line change
`@@ -214,7 +214,6 @@ def test_composability(tmp_path, sparsity_config, quantization_config):`
`214`	`214`	`"linear.row_offsets",`
`215`	`215`	`"linear.shape",`
`216`	`216`	`"linear.weight_scale",`
`217`		`- "linear.weight_zero_point",`
`218`	`217`	`},`
`219`	`218`	`)`
`220`	`219`	`],`
Original file line number	Diff line number	Diff line change
`@@ -89,8 +89,8 @@ def test_quant_format(strategy, group_size, sc, zp):`
`89`	`89`	`dense_state_dict, names_to_scheme=module_name_to_scheme`
`90`	`90`	`)`
`91`	`91`
`92`		`- # state_dict params should be the same (zero_point included even for symmetric)`
`93`		`- assert len(dense_state_dict) == len(compressed_state_dict)`
	`92`	`+ # state_dict params should be the same, minus the zero_point if symmetric`
	`93`	`+ assert len(dense_state_dict) == len(compressed_state_dict) + 1`
`94`	`94`
`95`	`95`	`# check compressed to int8`
`96`	`96`	`assert compressed_state_dict["dummy.weight_scale"].dtype == torch.float32`