Merge branch 'main' into kylesayrs/transform-quip-modifier

kylesayrs · web-flow · commit 62958bbbc11a · 2025-08-12T17:44:17.000-04:00
diff --git a/tests/llmcompressor/modifiers/calibration/test_cache.py b/tests/llmcompressor/modifiers/calibration/test_cache.py
@@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton():
 
 
 def test_update():
-    nbits = 8
-    args = QuantizationArgs(nbits=nbits, symmetric=True)
+    num_bits = 8
+    args = QuantizationArgs(num_bits=num_bits, symmetric=True)
     cache = QuantizedKVParameterCache(args)
 
     max_key_states_val = 1.0
@@ -62,7 +62,7 @@ def test_update():
     layer_idx = 0
 
     cache.update(key_states, value_states, layer_idx)
-    denom = (2 ** (nbits) - 1) / 2
+    denom = (2 ** (num_bits) - 1) / 2
     expected_k_scale = torch.tensor([max_key_states_val / denom])
     expected_v_scale = torch.tensor([max_value_states_val / denom])
 
@@ -83,8 +83,8 @@ def test_update():
 
 
 def test_cache_reset():
-    nbits = 8
-    args = QuantizationArgs(nbits=nbits, symmetric=True)
+    num_bits = 8
+    args = QuantizationArgs(num_bits=num_bits, symmetric=True)
     cache = QuantizedKVParameterCache(args)
 
     max_key_states_val = 1.0
diff --git a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py
@@ -96,13 +96,11 @@ def setUp(self):
                         "symmetric": False,
                         "strategy": "token",
                         "dynamic": True,
-                        "kwargs": {},
                     },
                     "weights": {
                         "num_bits": 4,
                         "symmetric": True,
                         "strategy": "channel",
-                        "kwargs": {},
                     },
                 }
             }
diff --git a/tests/llmcompressor/pytorch/utils/test_sparse.py b/tests/llmcompressor/pytorch/utils/test_sparse.py
@@ -1,6 +1,11 @@
 import pytest
 import torch
-from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationScheme,
+    QuantizationStrategy,
+    QuantizationType,
+)
 from torch.nn import Linear, Module, ReLU
 
 from llmcompressor.pytorch.utils import ModuleSparsificationInfo
@@ -16,10 +21,12 @@ def __init__(self):
         self.fc1.quantization_scheme = QuantizationScheme(
             targets=["model.fc1"],
             weights=QuantizationArgs(
-                precision=8,
-                granularity="per_tensor",
-                algorithm="gptq",
-                blocksize=128,
+                num_bits=8,
+                type=QuantizationType.INT,
+                group_size=128,
+                strategy=QuantizationStrategy.GROUP,
+                symmetric=True,
+                dynamic=False,
             ),
         )
 
diff --git a/tests/lmeval/configs/w4a16_awq_sym.yaml b/tests/lmeval/configs/w4a16_awq_sym.yaml
@@ -1,7 +1,7 @@
 cadence: "weekly"
 model: meta-llama/Meta-Llama-3-8B-Instruct
 scheme: W4A16
-recipe: tests/e2e/vLLM/recipes/AWQ/recipe_w4a16_awq_sym.yaml
+recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 lmeval:

Original file line number	Diff line number	Diff line change
`@@ -96,13 +96,11 @@ def setUp(self):`
`96`	`96`	`"symmetric": False,`
`97`	`97`	`"strategy": "token",`
`98`	`98`	`"dynamic": True,`
`99`		`- "kwargs": {},`
`100`	`99`	`},`
`101`	`100`	`"weights": {`
`102`	`101`	`"num_bits": 4,`
`103`	`102`	`"symmetric": True,`
`104`	`103`	`"strategy": "channel",`
`105`		`- "kwargs": {},`
`106`	`104`	`},`
`107`	`105`	`}`
`108`	`106`	`}`