Skip to content

Commit 62958bb

Browse files
authored
Merge branch 'main' into kylesayrs/transform-quip-modifier
2 parents 972f59f + d5a6a4b commit 62958bb

File tree

4 files changed

+18
-13
lines changed

4 files changed

+18
-13
lines changed

tests/llmcompressor/modifiers/calibration/test_cache.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton():
4747

4848

4949
def test_update():
50-
nbits = 8
51-
args = QuantizationArgs(nbits=nbits, symmetric=True)
50+
num_bits = 8
51+
args = QuantizationArgs(num_bits=num_bits, symmetric=True)
5252
cache = QuantizedKVParameterCache(args)
5353

5454
max_key_states_val = 1.0
@@ -62,7 +62,7 @@ def test_update():
6262
layer_idx = 0
6363

6464
cache.update(key_states, value_states, layer_idx)
65-
denom = (2 ** (nbits) - 1) / 2
65+
denom = (2 ** (num_bits) - 1) / 2
6666
expected_k_scale = torch.tensor([max_key_states_val / denom])
6767
expected_v_scale = torch.tensor([max_value_states_val / denom])
6868

@@ -83,8 +83,8 @@ def test_update():
8383

8484

8585
def test_cache_reset():
86-
nbits = 8
87-
args = QuantizationArgs(nbits=nbits, symmetric=True)
86+
num_bits = 8
87+
args = QuantizationArgs(num_bits=num_bits, symmetric=True)
8888
cache = QuantizedKVParameterCache(args)
8989

9090
max_key_states_val = 1.0

tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ def setUp(self):
9696
"symmetric": False,
9797
"strategy": "token",
9898
"dynamic": True,
99-
"kwargs": {},
10099
},
101100
"weights": {
102101
"num_bits": 4,
103102
"symmetric": True,
104103
"strategy": "channel",
105-
"kwargs": {},
106104
},
107105
}
108106
}

tests/llmcompressor/pytorch/utils/test_sparse.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import pytest
22
import torch
3-
from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
3+
from compressed_tensors.quantization import (
4+
QuantizationArgs,
5+
QuantizationScheme,
6+
QuantizationStrategy,
7+
QuantizationType,
8+
)
49
from torch.nn import Linear, Module, ReLU
510

611
from llmcompressor.pytorch.utils import ModuleSparsificationInfo
@@ -16,10 +21,12 @@ def __init__(self):
1621
self.fc1.quantization_scheme = QuantizationScheme(
1722
targets=["model.fc1"],
1823
weights=QuantizationArgs(
19-
precision=8,
20-
granularity="per_tensor",
21-
algorithm="gptq",
22-
blocksize=128,
24+
num_bits=8,
25+
type=QuantizationType.INT,
26+
group_size=128,
27+
strategy=QuantizationStrategy.GROUP,
28+
symmetric=True,
29+
dynamic=False,
2330
),
2431
)
2532

tests/lmeval/configs/w4a16_awq_sym.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cadence: "weekly"
22
model: meta-llama/Meta-Llama-3-8B-Instruct
33
scheme: W4A16
4-
recipe: tests/e2e/vLLM/recipes/AWQ/recipe_w4a16_awq_sym.yaml
4+
recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml
55
dataset_id: HuggingFaceH4/ultrachat_200k
66
dataset_split: train_sft
77
lmeval:

0 commit comments

Comments
 (0)