diff --git a/tests/llmcompressor/modifiers/calibration/test_cache.py b/tests/llmcompressor/modifiers/calibration/test_cache.py index 898c342f5..9b03234cf 100644 --- a/tests/llmcompressor/modifiers/calibration/test_cache.py +++ b/tests/llmcompressor/modifiers/calibration/test_cache.py @@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton(): def test_update(): - nbits = 8 - args = QuantizationArgs(nbits=nbits, symmetric=True) + num_bits = 8 + args = QuantizationArgs(num_bits=num_bits, symmetric=True) cache = QuantizedKVParameterCache(args) max_key_states_val = 1.0 @@ -62,7 +62,7 @@ def test_update(): layer_idx = 0 cache.update(key_states, value_states, layer_idx) - denom = (2 ** (nbits) - 1) / 2 + denom = (2 ** (num_bits) - 1) / 2 expected_k_scale = torch.tensor([max_key_states_val / denom]) expected_v_scale = torch.tensor([max_value_states_val / denom]) @@ -83,8 +83,8 @@ def test_update(): def test_cache_reset(): - nbits = 8 - args = QuantizationArgs(nbits=nbits, symmetric=True) + num_bits = 8 + args = QuantizationArgs(num_bits=num_bits, symmetric=True) cache = QuantizedKVParameterCache(args) max_key_states_val = 1.0 diff --git a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py index 65d835ab1..44a582788 100644 --- a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py +++ b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py @@ -96,13 +96,11 @@ def setUp(self): "symmetric": False, "strategy": "token", "dynamic": True, - "kwargs": {}, }, "weights": { "num_bits": 4, "symmetric": True, "strategy": "channel", - "kwargs": {}, }, } } diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py index b0d6e4321..aa4035a12 100644 --- a/tests/llmcompressor/pytorch/utils/test_sparse.py +++ b/tests/llmcompressor/pytorch/utils/test_sparse.py @@ -1,6 +1,11 @@ import pytest import torch -from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme +from compressed_tensors.quantization import ( + QuantizationArgs, + QuantizationScheme, + QuantizationStrategy, + QuantizationType, +) from torch.nn import Linear, Module, ReLU from llmcompressor.pytorch.utils import ModuleSparsificationInfo @@ -16,10 +21,12 @@ def __init__(self): self.fc1.quantization_scheme = QuantizationScheme( targets=["model.fc1"], weights=QuantizationArgs( - precision=8, - granularity="per_tensor", - algorithm="gptq", - blocksize=128, + num_bits=8, + type=QuantizationType.INT, + group_size=128, + strategy=QuantizationStrategy.GROUP, + symmetric=True, + dynamic=False, ), )