[BugFix] Directly Convert Modifiers to Recipe Instance (#1271)

rahul-tuli · web-flow · commit 027caa4abb9b · 2025-04-02T17:42:14.000Z
Currently, the process of recipe creation follows this sequence: **Modifiers → String (Serialization) → Recipe Instance (Deserialization)** This intermediate serialization and deserialization step introduces issues when dealing with more complex objects, such as **SmoothQuant mappings**, which can lead to parsing errors. ### Solution This PR refactors the flow to directly construct the **Recipe Instance** from **Modifiers**, thereby **removing an unnecessary conversion step** and eliminating a potential source of error. ### Issue Tracking This issue was originally surfaced in [[#37](https://github.com/vllm-project/llm-compressor/issues/37)](https://github.com/vllm-project/llm-compressor/issues/37) and is formally tracked under **[[INFERENG-358](https://issues.redhat.com/browse/INFERENG-358)](https://issues.redhat.com/browse/INFERENG-358)**. ### Testing The issue was reproduced using the following script, which previously errored out but now runs **successfully** with this fix: ```python from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer from llmcompressor import oneshot from llmcompressor.modifiers.quantization import GPTQModifier from llmcompressor.modifiers.smoothquant import SmoothQuantModifier DATASET_ID = "HuggingFaceH4/ultrachat_200k" MODEL_ID = "bigscience/bloom-3b" DATASET_SPLIT = "train_sft" NUM_CALIBRATION_SAMPLES = 512 MAX_SEQUENCE_LENGTH = 2048 # Load model and tokenizer model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", torch_dtype="auto" ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) # Define quantization recipe recipe = [ SmoothQuantModifier( smoothing_strength=0.8, mappings=[ (["re:.*query_key_value"], "re:.*input_layernorm"), (["re:.*dense_h_to_4h"], "re:.*post_attention_layernorm"), ], ), GPTQModifier( scheme="W8A8", targets="Linear", ignore=["lm_head"], dampening_frac=0.003, ), ] # Load and preprocess dataset dataset = load_dataset(DATASET_ID, split=DATASET_SPLIT) dataset = dataset.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES)) def preprocess(example): """Formats the messages into a simple dialogue format.""" text = "\n".join([msg["content"] for msg in example["messages"]]) return {"text": text} dataset = dataset.map(preprocess) # Apply quantization oneshot( model=model, dataset=dataset, recipe=recipe, output_dir="bloom-3b-gptq-w8a8", max_seq_length=MAX_SEQUENCE_LENGTH, num_calibration_samples=NUM_CALIBRATION_SAMPLES, ) ``` With this fix, the script now runs **to completion** without errors. Automated tests have also been added to test new changes --------- Signed-off-by: Rahul Tuli <rahul@neuralmagic.com>
diff --git a/src/llmcompressor/recipe/modifier.py b/src/llmcompressor/recipe/modifier.py
@@ -83,13 +83,20 @@ def create_modifier(self) -> "Modifier":
     @model_validator(mode="before")
     @classmethod
     def extract_modifier_type(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        modifier = {"group": values.pop("group")}
-        assert len(values) == 1, "multiple key pairs found for modifier"
-        modifier_type, args = list(values.items())[0]
-
-        modifier["type"] = modifier_type
-        modifier["args"] = args
-        return modifier
+        if len(values) == 2:
+            if "group" not in values:
+                raise ValueError(
+                    "Invalid format: expected keys 'group' and one modifier "
+                    f"type, but got keys: {list(values.keys())}"
+                )
+
+            # values contains only group and the Modifier type as keys
+            group = values.pop("group")
+            modifier_type, args = values.popitem()
+            return {"group": group, "type": modifier_type, "args": args}
+
+        # values already in the correct format
+        return values
 
     def dict(self, *args, **kwargs) -> Dict[str, Any]:
         """
diff --git a/src/llmcompressor/recipe/recipe.py b/src/llmcompressor/recipe/recipe.py
@@ -12,6 +12,7 @@
 from llmcompressor.recipe.args import RecipeArgs
 from llmcompressor.recipe.base import RecipeBase
 from llmcompressor.recipe.metadata import RecipeMetaData
+from llmcompressor.recipe.modifier import RecipeModifier
 from llmcompressor.recipe.stage import RecipeStage
 
 __all__ = [
@@ -61,20 +62,29 @@ def from_modifiers(
         """
         logger.info("Creating recipe from modifiers")
 
-        # validate Modifiers
         if isinstance(modifiers, Modifier):
-            modifiers: List[Modifier] = [modifiers]
+            modifiers = [modifiers]
 
         if any(not isinstance(modifier, Modifier) for modifier in modifiers):
             raise ValueError("modifiers must be a list of Modifier instances")
 
-        recipe_string: str = create_recipe_string_from_modifiers(
-            modifiers=modifiers,
-            modifier_group_name=modifier_group_name,
-        )
+        group_name = modifier_group_name or "default"
 
-        # modifier group name already included in the recipe string
-        return cls.create_instance(path_or_modifiers=recipe_string)
+        recipe_modifiers: List[RecipeModifier] = [
+            RecipeModifier(
+                type=modifier.__class__.__name__,
+                group=group_name,
+                args=modifier.model_dump(exclude_unset=True),
+            )
+            for modifier in modifiers
+        ]
+        # assume one stage for modifier instances
+        stages: List[RecipeStage] = [
+            RecipeStage(group=group_name, modifiers=recipe_modifiers)
+        ]
+        recipe = cls()
+        recipe.stages = stages
+        return recipe
 
     @classmethod
     def create_instance(
@@ -652,67 +662,6 @@ def _parse_recipe_from_md(file_path, yaml_str):
     return yaml_str
 
 
-def create_recipe_string_from_modifiers(
-    modifiers: List[Modifier],
-    modifier_group_name: Optional[str] = None,
-) -> str:
-    """
-    Create a recipe string from a list of Modifier instances
-
-    (Note: this pathway assumes there's only one stage in the recipe
-    associated by the modifier_group_name, if None, a dummy default
-    group_name will be assigned.)
-
-    :param modifiers: The list of Modifier instances
-    :param modifier_group_name: The stage_name of the recipe,
-        if `oneshot` or `train` the run_type of the recipe will be
-        inferred from the modifier_group_name, if None, a dummy default
-        group_name will be assigned.
-    :return: A string in yaml format from which the recipe can be created
-    """
-
-    # Recipe(s) are yaml/json strings of the following format:
-    # run_type_stage: # should contain oneshot/train
-    #    modifiers:
-    #        ModifierTypeOne:
-    #            start: 0.0
-    #            end: 2.0
-    #            ...
-    #        ModifierTypeTwo:
-    #            ...
-
-    # Create a recipe string from the modifiers
-    default_group_name: str = "DEFAULT"
-    modifier_group_name: str = modifier_group_name or default_group_name
-
-    recipe_dict = {
-        f"{modifier_group_name}_stage": {
-            f"{default_group_name}_modifiers": {
-                modifier.__class__.__name__: modifier.model_dump(exclude_unset=True)
-                for modifier in modifiers
-            }
-        }
-    }
-    recipe_str: str = yaml.dump(recipe_dict, sort_keys=False)
-    return recipe_str
-
-
-def get_modifiers_dict(modifiers: List[Dict[str, Any]]) -> Dict[str, Any]:
-    group_dict = {}
-
-    for modifier in modifiers:
-        modifier_type = modifier["type"]
-        modifier_group = modifier["group"]
-
-        if modifier_group not in group_dict:
-            group_dict[modifier_group] = []
-
-        modifier_dict = {modifier_type: modifier["args"]}
-        group_dict[modifier_group].append(modifier_dict)
-
-    return group_dict
-
-
 def get_yaml_serializable_stage_dict(modifiers: List[Dict[str, Any]]) -> Dict[str, Any]:
     """
     This function is used to convert a list of modifiers into a dictionary
diff --git a/src/llmcompressor/recipe/stage.py b/src/llmcompressor/recipe/stage.py
@@ -139,26 +139,26 @@ def extract_dict_modifiers(values: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
 
         modifiers = []
-        remove_keys = []
-
-        if "modifiers" in values and values["modifiers"]:
-            remove_keys.append("modifiers")
-            for mod_key, mod_value in values["stages"].items():
-                modifier = {mod_key: mod_value}
-                modifier["group"] = "default"
-                modifiers.append(modifier)
-
-        for key, value in list(values.items()):
-            if key.endswith("_modifiers"):
-                remove_keys.append(key)
-                group = key.rsplit("_modifiers", 1)[0]
-                for mod_key, mod_value in value.items():
-                    modifier = {mod_key: mod_value}
-                    modifier["group"] = group
-                    modifiers.append(modifier)
-
-        for key in remove_keys:
-            del values[key]
+
+        if "modifiers" in values:
+            modifier_values = values.pop("modifiers")
+            if "stages" in values:
+                for mod_key, mod_value in values.pop("stages").items():
+                    modifiers.append({mod_key: mod_value, "group": "default"})
+            else:
+                values["default_stage"] = {
+                    "default_modifiers": {mod.type: mod.args for mod in modifier_values}
+                }
+                modifiers.extend(
+                    {mod.type: mod.args, "group": "default"} for mod in modifier_values
+                )
+
+        for key in [k for k in values if k.endswith("_modifiers")]:
+            group = key.rsplit("_modifiers", 1)[0]
+            modifiers.extend(
+                {mod_key: mod_value, "group": group}
+                for mod_key, mod_value in values.pop(key).items()
+            )
 
         return modifiers
 
diff --git a/tests/e2e/recipe.yaml b/tests/e2e/recipe.yaml
@@ -0,0 +1,17 @@
+quant_stage:
+  quant_modifiers:
+    SmoothQuantModifier:
+      smoothing_strength: 0.8
+      mappings:
+      - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
+        - re:.*input_layernorm
+      - - ['re:.*gate_proj', 're:.*up_proj']
+        - re:.*post_attention_layernorm
+    GPTQModifier:
+      sequential_update: false
+      ignore: [lm_head]
+      config_groups:
+        group_0:
+          weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}
+          input_activations: {num_bits: 8, symmetric: false}
+          targets: [Linear]
diff --git a/tests/e2e/test_recipe_parsing.py b/tests/e2e/test_recipe_parsing.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+
+import pytest
+from transformers import AutoModelForCausalLM
+
+from llmcompressor.core.session_functions import reset_session
+from llmcompressor.modifiers.quantization.gptq import GPTQModifier
+from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
+from llmcompressor.modifiers.smoothquant.utils import DEFAULT_SMOOTHQUANT_MAPPINGS
+from llmcompressor.transformers import oneshot
+from tests.testing_utils import requires_gpu
+
+
+@pytest.fixture
+def common_setup():
+    model_stub = "Xenova/llama2.c-stories110M"
+    model = AutoModelForCausalLM.from_pretrained(
+        model_stub, device_map="auto", torch_dtype="auto"
+    )
+
+    dataset = "ultrachat-200k"
+    output_dir = "./test_output"
+    splits = {"calibration": "train_gen[:5%]"}
+    max_seq_length = 2048
+    pad_to_max_length = False
+    num_calibration_samples = 8
+
+    return (
+        model,
+        dataset,
+        output_dir,
+        splits,
+        max_seq_length,
+        pad_to_max_length,
+        num_calibration_samples,
+    )
+
+
+def recipes():
+    modifier_objects = [
+        SmoothQuantModifier(
+            smoothing_strength=0.8, mappings=DEFAULT_SMOOTHQUANT_MAPPINGS
+        ),
+        GPTQModifier(
+            targets="Linear", scheme="W8A8", ignore=["lm_head"], sequential_update=False
+        ),
+    ]
+
+    recipe_str = """
+DEFAULT_stage:
+  DEFAULT_modifiers:
+    SmoothQuantModifier:
+      smoothing_strength: 0.8
+      mappings:
+      - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
+        - re:.*input_layernorm
+      - - ['re:.*gate_proj', 're:.*up_proj']
+        - re:.*post_attention_layernorm
+    GPTQModifier:
+      sequential_update: false
+      targets: Linear
+      scheme: W8A8
+"""
+
+    recipe_file = str(Path(__file__).parent / "recipe.yaml")
+
+    return [modifier_objects, recipe_str, recipe_file]
+
+
+@requires_gpu
+@pytest.mark.parametrize("recipe", recipes())
+def test_oneshot(common_setup, recipe):
+    (
+        model,
+        dataset,
+        output_dir,
+        splits,
+        max_seq_length,
+        pad_to_max_length,
+        num_calibration_samples,
+    ) = common_setup
+
+    oneshot(
+        model=model,
+        dataset=dataset,
+        recipe=recipe,
+        output_dir=output_dir,
+        splits=splits,
+        max_seq_length=max_seq_length,
+        pad_to_max_length=pad_to_max_length,
+        num_calibration_samples=num_calibration_samples,
+        save_compressed=True,
+    )
+
+    reset_session()
diff --git a/tests/llmcompressor/helpers.py b/tests/llmcompressor/helpers.py
@@ -1,3 +1,6 @@
+# flake8: noqa
+
+
 def valid_recipe_strings():
     return [
         """
@@ -52,4 +55,14 @@ def valid_recipe_strings():
                     final_sparsity: 0.5
                     targets: __ALL_PRUNABLE__
         """,
+        """
+        test1_stage:
+            smoothquant_modifiers:
+                SmoothQuantModifier:
+                    smoothing_strength: 0.5
+                    mappings: [
+                        [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
+                        [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
+                    ]
+        """,
     ]
diff --git a/tests/llmcompressor/recipe/test_recipe.py b/tests/llmcompressor/recipe/test_recipe.py
@@ -3,10 +3,8 @@
 import pytest
 import yaml
 
-from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.obcq.base import SparseGPTModifier
 from llmcompressor.recipe import Recipe
-from llmcompressor.recipe.recipe import create_recipe_string_from_modifiers
 from tests.llmcompressor.helpers import valid_recipe_strings
 
 
@@ -97,46 +95,4 @@ def test_recipe_can_be_created_from_modifier_instances():
         actual_modifiers[0].modifiers, expected_modifiers[0].modifiers
     ):
         assert isinstance(actual_modifier, type(expected_modifier))
-        assert actual_modifier.dict() == expected_modifier.dict()
-
-
-class A_FirstDummyModifier(Modifier):
-    def on_initialize(self, *args, **kwargs) -> bool:
-        return True
-
-
-class B_SecondDummyModifier(Modifier):
-    def on_initialize(self, *args, **kwargs) -> bool:
-        return True
-
-
-def test_create_recipe_string_from_modifiers_with_default_group_name():
-    modifiers = [B_SecondDummyModifier(), A_FirstDummyModifier()]
-    expected_recipe_str = (
-        "DEFAULT_stage:\n"
-        "  DEFAULT_modifiers:\n"
-        "    B_SecondDummyModifier: {}\n"
-        "    A_FirstDummyModifier: {}\n"
-    )
-    actual_recipe_str = create_recipe_string_from_modifiers(modifiers)
-    assert actual_recipe_str == expected_recipe_str
-
-
-def test_create_recipe_string_from_modifiers_with_custom_group_name():
-    modifiers = [B_SecondDummyModifier(), A_FirstDummyModifier()]
-    group_name = "custom"
-    expected_recipe_str = (
-        "custom_stage:\n"
-        "  DEFAULT_modifiers:\n"
-        "    B_SecondDummyModifier: {}\n"
-        "    A_FirstDummyModifier: {}\n"
-    )
-    actual_recipe_str = create_recipe_string_from_modifiers(modifiers, group_name)
-    assert actual_recipe_str == expected_recipe_str
-
-
-def test_create_recipe_string_from_modifiers_with_empty_modifiers():
-    modifiers = []
-    expected_recipe_str = "DEFAULT_stage:\n" "  DEFAULT_modifiers: {}\n"
-    actual_recipe_str = create_recipe_string_from_modifiers(modifiers)
-    assert actual_recipe_str == expected_recipe_str
+        assert actual_modifier.model_dump() == expected_modifier.model_dump()