From cc3e7aa69dd3a1dbf5d2f37ca0f9e508b2477fd5 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 15:31:55 -0500 Subject: [PATCH 1/6] fix quant scheme errors Signed-off-by: Brian Dellabetta --- .../pytorch/utils/test_sparse.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py index b0d6e4321..1b7d9f4a4 100644 --- a/tests/llmcompressor/pytorch/utils/test_sparse.py +++ b/tests/llmcompressor/pytorch/utils/test_sparse.py @@ -1,6 +1,6 @@ import pytest import torch -from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme +from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme, QuantizationStrategy, QuantizationType from torch.nn import Linear, Module, ReLU from llmcompressor.pytorch.utils import ModuleSparsificationInfo @@ -14,14 +14,16 @@ def __init__(self): self.relu = ReLU() self.fc1.quantization_scheme = QuantizationScheme( - targets=["model.fc1"], - weights=QuantizationArgs( - precision=8, - granularity="per_tensor", - algorithm="gptq", - blocksize=128, - ), - ) + targets=["Linear"], + weights=QuantizationArgs( + num_bits=4, + type=QuantizationType.INT, + group_size=128, + strategy=QuantizationStrategy.GROUP, + symmetric=True, + dynamic=False, + ) + ) def test_module_quantization_info(): From 73b9775501c49516102bf113515632abdec91761 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 20:32:29 +0000 Subject: [PATCH 2/6] formatting Signed-off-by: Brian Dellabetta --- .../pytorch/utils/test_sparse.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py index 1b7d9f4a4..222074816 100644 --- a/tests/llmcompressor/pytorch/utils/test_sparse.py +++ b/tests/llmcompressor/pytorch/utils/test_sparse.py @@ -1,6 +1,11 @@ import pytest import torch -from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme, QuantizationStrategy, QuantizationType +from compressed_tensors.quantization import ( + QuantizationArgs, + QuantizationScheme, + QuantizationStrategy, + QuantizationType, +) from torch.nn import Linear, Module, ReLU from llmcompressor.pytorch.utils import ModuleSparsificationInfo @@ -14,16 +19,16 @@ def __init__(self): self.relu = ReLU() self.fc1.quantization_scheme = QuantizationScheme( - targets=["Linear"], - weights=QuantizationArgs( - num_bits=4, - type=QuantizationType.INT, - group_size=128, - strategy=QuantizationStrategy.GROUP, - symmetric=True, - dynamic=False, - ) - ) + targets=["Linear"], + weights=QuantizationArgs( + num_bits=4, + type=QuantizationType.INT, + group_size=128, + strategy=QuantizationStrategy.GROUP, + symmetric=True, + dynamic=False, + ), + ) def test_module_quantization_info(): From eabf1f224a84f3fc4459c905556bf62f9909d8ed Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 20:35:32 +0000 Subject: [PATCH 3/6] another test fix Signed-off-by: Brian Dellabetta --- .../pytorch/modifiers/pruning/sparsegpt/test_pytorch.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py index 65d835ab1..44a582788 100644 --- a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py +++ b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py @@ -96,13 +96,11 @@ def setUp(self): "symmetric": False, "strategy": "token", "dynamic": True, - "kwargs": {}, }, "weights": { "num_bits": 4, "symmetric": True, "strategy": "channel", - "kwargs": {}, }, } } From 5e6cdc6e6ead7de3f18618bfb826b6d8cb484f19 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 20:37:01 +0000 Subject: [PATCH 4/6] gemini fixes Signed-off-by: Brian Dellabetta --- tests/llmcompressor/pytorch/utils/test_sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py index 222074816..15f87474b 100644 --- a/tests/llmcompressor/pytorch/utils/test_sparse.py +++ b/tests/llmcompressor/pytorch/utils/test_sparse.py @@ -21,7 +21,7 @@ def __init__(self): self.fc1.quantization_scheme = QuantizationScheme( targets=["Linear"], weights=QuantizationArgs( - num_bits=4, + num_bits=8, type=QuantizationType.INT, group_size=128, strategy=QuantizationStrategy.GROUP, From a719ec174ff0a64644f336f93560e8eadf6eeaec Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 20:45:34 +0000 Subject: [PATCH 5/6] unit test fixes Signed-off-by: Brian Dellabetta --- .../llmcompressor/modifiers/calibration/test_cache.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/llmcompressor/modifiers/calibration/test_cache.py b/tests/llmcompressor/modifiers/calibration/test_cache.py index 898c342f5..9b03234cf 100644 --- a/tests/llmcompressor/modifiers/calibration/test_cache.py +++ b/tests/llmcompressor/modifiers/calibration/test_cache.py @@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton(): def test_update(): - nbits = 8 - args = QuantizationArgs(nbits=nbits, symmetric=True) + num_bits = 8 + args = QuantizationArgs(num_bits=num_bits, symmetric=True) cache = QuantizedKVParameterCache(args) max_key_states_val = 1.0 @@ -62,7 +62,7 @@ def test_update(): layer_idx = 0 cache.update(key_states, value_states, layer_idx) - denom = (2 ** (nbits) - 1) / 2 + denom = (2 ** (num_bits) - 1) / 2 expected_k_scale = torch.tensor([max_key_states_val / denom]) expected_v_scale = torch.tensor([max_value_states_val / denom]) @@ -83,8 +83,8 @@ def test_update(): def test_cache_reset(): - nbits = 8 - args = QuantizationArgs(nbits=nbits, symmetric=True) + num_bits = 8 + args = QuantizationArgs(num_bits=num_bits, symmetric=True) cache = QuantizedKVParameterCache(args) max_key_states_val = 1.0 From c842655c394b384cafd42dfdd4d1f207cee61e60 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 11 Aug 2025 20:52:25 +0000 Subject: [PATCH 6/6] codereview update Signed-off-by: Brian Dellabetta --- tests/llmcompressor/pytorch/utils/test_sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py index 15f87474b..aa4035a12 100644 --- a/tests/llmcompressor/pytorch/utils/test_sparse.py +++ b/tests/llmcompressor/pytorch/utils/test_sparse.py @@ -19,7 +19,7 @@ def __init__(self): self.relu = ReLU() self.fc1.quantization_scheme = QuantizationScheme( - targets=["Linear"], + targets=["model.fc1"], weights=QuantizationArgs( num_bits=8, type=QuantizationType.INT,