vllm-project · brian-dellabetta · Aug 12, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/tests/llmcompressor/modifiers/calibration/test_cache.py b/tests/llmcompressor/modifiers/calibration/test_cache.py
@@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton():
 
 
 def test_update():
-    nbits = 8
-    args = QuantizationArgs(nbits=nbits, symmetric=True)
+    num_bits = 8
+    args = QuantizationArgs(num_bits=num_bits, symmetric=True)
     cache = QuantizedKVParameterCache(args)
 
     max_key_states_val = 1.0
@@ -62,7 +62,7 @@ def test_update():
     layer_idx = 0
 
     cache.update(key_states, value_states, layer_idx)
-    denom = (2 ** (nbits) - 1) / 2
+    denom = (2 ** (num_bits) - 1) / 2
     expected_k_scale = torch.tensor([max_key_states_val / denom])
     expected_v_scale = torch.tensor([max_value_states_val / denom])
 
@@ -83,8 +83,8 @@ def test_update():
 
 
 def test_cache_reset():
-    nbits = 8
-    args = QuantizationArgs(nbits=nbits, symmetric=True)
+    num_bits = 8
+    args = QuantizationArgs(num_bits=num_bits, symmetric=True)
     cache = QuantizedKVParameterCache(args)
 
     max_key_states_val = 1.0

diff --git a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py
@@ -96,13 +96,11 @@ def setUp(self):
                         "symmetric": False,
                         "strategy": "token",
                         "dynamic": True,
-                        "kwargs": {},
                     },
                     "weights": {
                         "num_bits": 4,
                         "symmetric": True,
                         "strategy": "channel",
-                        "kwargs": {},
                     },
                 }
             }

diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py
@@ -1,6 +1,11 @@
 import pytest
 import torch
-from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationScheme,
+    QuantizationStrategy,
+    QuantizationType,
+)
 from torch.nn import Linear, Module, ReLU
 
 from llmcompressor.pytorch.utils import ModuleSparsificationInfo
@@ -16,10 +21,12 @@ def __init__(self):
         self.fc1.quantization_scheme = QuantizationScheme(
             targets=["model.fc1"],
             weights=QuantizationArgs(
-                precision=8,
-                granularity="per_tensor",
-                algorithm="gptq",
-                blocksize=128,
+                num_bits=8,
+                type=QuantizationType.INT,
+                group_size=128,
+                strategy=QuantizationStrategy.GROUP,
+                symmetric=True,
+                dynamic=False,
             ),
         )