From 2dffddd72065ef837ce94563e385a37a1ecafb0c Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 29 Sep 2025 22:04:10 +0000 Subject: [PATCH 1/3] squashed Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 4 +++- .../vLLM/configs/w4a16_grouped_quant_asym_awq.yaml | 4 ++-- .../vLLM/configs/w4a16_grouped_quant_sym_awq.yaml | 4 ++-- .../e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml | 1 + tests/testing_utils.py | 12 ------------ 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 49698fe09..72a7240e3 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -139,8 +139,10 @@ class AWQModifier(Modifier, QuantizationMixin): default_factory=dict ) + # NOTE: different name chosen to avoid collision with + # QuantizationMixin.validate_model_after, which must be called first @model_validator(mode="after") - def validate_model_after(model: "AWQModifier") -> "AWQModifier": + def validate_awq_after(model: "AWQModifier") -> "AWQModifier": """ Confirm only one configuration for group_size, symmetric, and num_bits, as AWQ algorithm depends on it diff --git a/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml b/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml index d833c5aa2..83ac5a645 100644 --- a/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml +++ b/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml @@ -2,7 +2,7 @@ cadence: "nightly" test_type: "regression" model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml -dataset_id: "mit-han-lab/pile-val-backup" -dataset_split: validation +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft scheme: W4A16_weight_asym_awq save_dir: TinyLlama-1.1B-Chat-v1.0-w4a16-asym-awq \ No newline at end of file diff --git a/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml b/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml index d0bb47b6e..220649a18 100644 --- a/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml +++ b/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml @@ -2,7 +2,7 @@ cadence: "nightly" test_type: "regression" model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml -dataset_id: "mit-han-lab/pile-val-backup" -dataset_split: validation +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft scheme: W4A16_weight_sym_awq save_dir: TinyLlama-1.1B-Chat-v1.0-w4a16-sym-awq diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml index cee6a217e..66976136f 100644 --- a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml @@ -11,3 +11,4 @@ quant_stage: strategy: "group" group_size: 128 targets: ["Linear"] + \ No newline at end of file diff --git a/tests/testing_utils.py b/tests/testing_utils.py index b42470893..4ce6a5de6 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -285,18 +285,6 @@ def process(sample): "images": sample["image"], } - elif ds_name == "pile-val-backup": - - def preprocess(example): - return { - "input_ids": processor.encode(example["text"].strip())[:max_seq_length] - } - - ds = ds.map(preprocess, remove_columns=ds.column_names) - # Note: potentially swap filtering to pad for AWQ - ds = ds.filter(lambda example: len(example["input_ids"]) >= max_seq_length) - return ds - else: raise NotImplementedError(f"Cannot preprocess dataset {ds.info.dataset_name}") From 13b9add7d4af991c35e987e67279973ac4bf0312 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 29 Sep 2025 22:06:00 +0000 Subject: [PATCH 2/3] standardize names Signed-off-by: Brian Dellabetta --- tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml | 2 +- ...a16_group_quant_asym_awq.yaml => recipe_w4a16_awq_asym.yaml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/e2e/vLLM/recipes/WNA16/{recipe_w4a16_group_quant_asym_awq.yaml => recipe_w4a16_awq_asym.yaml} (100%) diff --git a/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml b/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml index 83ac5a645..6ae656b1d 100644 --- a/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml +++ b/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml @@ -1,7 +1,7 @@ cadence: "nightly" test_type: "regression" model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 -recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml +recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft scheme: W4A16_weight_asym_awq diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml similarity index 100% rename from tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml rename to tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml From 74b90c50537f4a5f2d845d913e1d7732e9818841 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 29 Sep 2025 22:06:42 +0000 Subject: [PATCH 3/3] newline Signed-off-by: Brian Dellabetta --- tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml index 66976136f..cee6a217e 100644 --- a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml @@ -11,4 +11,3 @@ quant_stage: strategy: "group" group_size: 128 targets: ["Linear"] - \ No newline at end of file