[Tests] Increase maximum quantization error (#1245)

kylesayrs · web-flow · commit 0bb5076d2d0b · 2025-03-13T21:46:16.000Z
## Purpose ## * Reduce false-positive test failures as a result of noisy quantization * https://github.com/vllm-project/llm-compressor/actions/runs/13775944340/job/38525122617#step:12:535 ## Changes ## * Increase maximum error due to quantization from 0.022 to 0.025 --------- Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
diff --git a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
@@ -430,8 +430,8 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
         if key.endswith("weight") and quant_format != "dense":
             # we don't expect an exact match for compressed
             diff = torch.abs(dense_tensor - reconstructed_tensor)
-            # max diff value found empirically
-            assert not torch.any(diff > 0.022), f"Max diff: {torch.max(diff)}"
+            # maximum quantization error as a result of compression is ~0.025
+            assert not torch.any(diff > 0.025), f"Max diff: {torch.max(diff)}"
         else:
             assert torch.equal(dense_tensor, reconstructed_tensor)
     shutil.rmtree(tmp_path)