fix tests

kylesayrs · kylesayrs · commit ca3ca7d924a6 · 2026-03-13T13:29:53.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py
@@ -87,6 +87,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
         quantization_config=CompressedTensorsConfig(run_compressed=False),
     )
 
+    _remove_zp(og_state_dict)  # HACK: remove extra zero points added during quant init
     reconstructed_state_dict = decompressed_model.state_dict()
     assert len(og_state_dict) == len(reconstructed_state_dict)
     for key in og_state_dict.keys():
@@ -275,3 +276,11 @@ def test_correct_compressor_inferred(
     model.linear.quantization_status = QuantizationStatus.FROZEN
 
     assert infer_model_format(model) == expected_format
+
+
+def _remove_zp(state_dict: dict) -> dict:
+    return {
+        key: value
+        for key, value in state_dict.items()
+        if not key.endswith("zero_point")
+    }
diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py
@@ -39,20 +39,16 @@ def _get_quant_info(model):
     for name, module in model.named_modules():
         with align_module_device(module):
             if is_module_quantized(module):
+                # skip zero points, as these are removed between
+                # compression/decompression for symmetric models
+
                 if module.quantization_scheme.weights is not None:
-                    quant_info_weights[name] = (
-                        module.weight_scale,
-                        module.weight_zero_point,
-                        module.weight,
-                    )
+                    quant_info_weights[name] = (module.weight_scale, module.weight)
 
                 if module.quantization_scheme.input_activations is not None:
                     is_dynamic = module.quantization_scheme.input_activations.dynamic
                     if not is_dynamic:
-                        quant_info_inputs[name] = (
-                            module.input_scale,
-                            module.input_zero_point,
-                        )
+                        quant_info_inputs[name] = (module.input_scale,)
 
     return quant_info_weights, quant_info_inputs
 
@@ -110,23 +106,19 @@ def test_quantization_reload(setup_model_and_config):
     # TODO: can remove `to` calls after
     # https://github.com/neuralmagic/compressed-tensors/pull/427
 
-    for name, (o_scale, o_zp, o_weight) in og_weights.items():
-        n_scale, n_zp, n_weight = reloaded_weights[name]
+    for name, (o_scale, o_weight) in og_weights.items():
+        n_scale, n_weight = reloaded_weights[name]
         assert o_scale.dtype == n_scale.dtype == config["weight_dtype"]
         assert torch.equal(o_scale, n_scale.to(o_scale.device))
-        assert o_zp.dtype == n_zp.dtype
-        assert torch.equal(o_zp, n_zp.to(o_zp.device))
 
         # we don't expect an exact match here because o_weight still has the
         # original weight and n_weight has been fake_quantized
         assert n_weight.dtype == o_weight.dtype == config["weight_dtype"]
 
-    for name, (o_scale, o_zp) in og_inputs.items():
-        n_scale, n_zp = reloaded_inputs[name]
+    for name, (o_scale,) in og_inputs.items():
+        (n_scale,) = reloaded_inputs[name]
         assert o_scale.dtype == n_scale.dtype == config["weight_dtype"]
         assert torch.equal(o_scale, n_scale.to(o_scale.device))
-        assert o_zp.dtype == n_zp.dtype
-        assert torch.equal(o_zp, n_zp.to(o_zp.device))
 
 
 @requires_gpu