[tests] remove pile-val-backup dataset from tests (#1879)

brian-dellabetta · web-flow · commit 4c8c0a7addc6 · 2025-09-30T09:38:25.000-04:00
SUMMARY: This PR - [x] Removes `pile-val-dataset` from e2e tests, as it is no longer used in examples and the processing logic was flawed - [x] Fixes a model validation error introduced in #1772 that was preventing AWQModifier from running one of the validations, causing it to be in an invalid state (`AWQModifier.validate_model_after` was preventing `QuantizationMixin.validate_model_after` from running). With these changes, tests pass and the compressed model generates meaningful responses. It was previously generating all 0s TEST PLAN: `CADENCE=nightly TEST_DATA_FILE=tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml pytest -s tests/e2e/vLLM/test_vllm.py` and `CADENCE=nightly TEST_DATA_FILE=tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml pytest -s tests/e2e/vLLM/test_vllm.py` both pass with output like ``` PROMPT: The capital of France is GENERATED TEXT: Paris, which is also the country's largest city. PROMPT: The president of the US is GENERATED TEXT: named, but the name of the Vice President is not given. In the case PROMPT: My name is GENERATED TEXT: Emily and I am from Canada. I have always been fascinated with ``` --------- Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -139,8 +139,10 @@ class AWQModifier(Modifier, QuantizationMixin):
         default_factory=dict
     )
 
+    # NOTE: different name chosen to avoid collision with
+    # QuantizationMixin.validate_model_after, which must be called first
     @model_validator(mode="after")
-    def validate_model_after(model: "AWQModifier") -> "AWQModifier":
+    def validate_awq_after(model: "AWQModifier") -> "AWQModifier":
         """
         Confirm only one configuration for group_size, symmetric, and num_bits,
         as AWQ algorithm depends on it
diff --git a/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml b/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml
@@ -1,8 +1,8 @@
 cadence: "nightly"
 test_type: "regression"
 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml
-dataset_id: "mit-han-lab/pile-val-backup"
-dataset_split: validation
+recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
 scheme: W4A16_weight_asym_awq
 save_dir: TinyLlama-1.1B-Chat-v1.0-w4a16-asym-awq
diff --git a/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml b/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml
@@ -2,7 +2,7 @@ cadence: "nightly"
 test_type: "regression"
 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml
-dataset_id: "mit-han-lab/pile-val-backup"
-dataset_split: validation
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
 scheme: W4A16_weight_sym_awq
 save_dir: TinyLlama-1.1B-Chat-v1.0-w4a16-sym-awq
diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
@@ -285,18 +285,6 @@ def process(sample):
                 "images": sample["image"],
             }
 
-    elif ds_name == "pile-val-backup":
-
-        def preprocess(example):
-            return {
-                "input_ids": processor.encode(example["text"].strip())[:max_seq_length]
-            }
-
-        ds = ds.map(preprocess, remove_columns=ds.column_names)
-        # Note: potentially swap filtering to pad for AWQ
-        ds = ds.filter(lambda example: len(example["input_ids"]) >= max_seq_length)
-        return ds
-
     else:
         raise NotImplementedError(f"Cannot preprocess dataset {ds.info.dataset_name}")