Skip to content

Commit f0f6392

Browse files
committed
fix kv cache test
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 80a2639 commit f0f6392

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ def on_start(self, state: State, event: Event, **kwargs):
158158

159159
if not added_hook:
160160
raise ValueError(
161-
"GPTQModifier requires a quantization config be specified by this "
162-
"modifier or a modifier preceding it"
161+
"GPTQModifier requires a weight quantization config be specified by "
162+
"this modifier or a modifier preceding it"
163163
)
164164

165165
def on_event(self, state: State, event: Event, **kwargs):

tests/llmcompressor/transformers/kv_cache/test_kv_cache.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from compressed_tensors.quantization.utils.helpers import iter_named_quantizable_modules
88
from datasets import load_dataset
99
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
10+
from transformers.utils.quantization_config import CompressedTensorsConfig
1011

1112
from llmcompressor import oneshot
1213
from llmcompressor.core import reset_session
@@ -236,7 +237,11 @@ def test_kv_cache_gptq_model_state_dict_attr(kv_cache_fixture, tmp_path):
236237
output_dir, _ = next(kv_cache_fixture(recipe, tmp_path))
237238

238239
with init_empty_weights():
239-
model = AutoModelForCausalLM.from_pretrained(output_dir)
240+
# There is a bug in `apply_quantization_config`
241+
model = AutoModelForCausalLM.from_pretrained(
242+
output_dir,
243+
quantization_config=CompressedTensorsConfig(run_compressed=False),
244+
)
240245

241246
counts = 0
242247
for name, submodule in iter_named_quantizable_modules(

0 commit comments

Comments
 (0)