TransformModifier with SpinQuant R1&R2

brian-dellabetta · brian-dellabetta · commit 320712434f3b · 2025-07-02T15:07:42.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/examples/transform/llama3_example.py b/examples/transform/llama3_example.py
@@ -1,9 +1,10 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.modifiers.transform import TransformModifier
 from llmcompressor import oneshot
+from llmcompressor.modifiers.quantization import GPTQModifier, QuantizationModifier
+from llmcompressor.modifiers.transform import TransformModifier
+from llmcompressor.utils import dispatch_for_generation
 
 # Select model and load it.
 MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
@@ -56,8 +57,8 @@ def tokenize(sample):
 # Configure the quantization algorithm to run.
 #   * quantize the weights to 4 bit with GPTQ with a group size 128
 recipe = [
-    TransformModifier(),
-    GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
+    TransformModifier(preset_config="LLAMA_SPINQUANT_R1R2"),
+    QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
 ]
 
 # Apply algorithms.
@@ -70,15 +71,16 @@ def tokenize(sample):
     num_calibration_samples=NUM_CALIBRATION_SAMPLES,
 )
 
-# Confirm generations of the quantized model look sane.
-print("\n\n")
-print("========== SAMPLE GENERATION ==============")
-input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
-output = model.generate(input_ids, max_new_tokens=100)
-print(tokenizer.decode(output[0]))
-print("==========================================\n\n")
+# # Confirm generations of the quantized model look sane.
+# print("\n\n")
+# print("========== SAMPLE GENERATION ==============")
+# dispatch_for_generation(model)
+# input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
+# output = model.generate(input_ids, max_new_tokens=100)
+# print(tokenizer.decode(output[0]))
+# print("==========================================\n\n")
 
 # Save to disk compressed.
-SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-G128"
+SAVE_DIR = MODEL_ID.split("/")[1] + "-transform-quant-w4a16"
 model.save_pretrained(SAVE_DIR, save_compressed=True)
 tokenizer.save_pretrained(SAVE_DIR)
diff --git a/src/llmcompressor/modifiers/transform/__init__.py b/src/llmcompressor/modifiers/transform/__init__.py
@@ -1,3 +1,4 @@
 # flake8: noqa
 
 from .transform import TransformModifier
+from .transform.presets import TRANSFORM_PRESETS
diff --git a/src/llmcompressor/modifiers/transform/presets/__init__.py b/src/llmcompressor/modifiers/transform/presets/__init__.py
@@ -0,0 +1,8 @@
+from .quip import QUIP
+from .spinquant import LLAMA_SPINQUANT, LLAMA_SPINQUANT_R1R2
+
+TRANSFORM_PRESETS = {
+    "QUIP": QUIP,
+    "LLAMA_SPINQUANT": LLAMA_SPINQUANT,
+    "LLAMA_SPINQUANT_R1R2": LLAMA_SPINQUANT_R1R2,
+}
diff --git a/src/llmcompressor/modifiers/transform/presets/quip.py b/src/llmcompressor/modifiers/transform/presets/quip.py
diff --git a/src/llmcompressor/modifiers/transform/presets/spinquant.py b/src/llmcompressor/modifiers/transform/presets/spinquant.py
@@ -1,5 +1,8 @@
 from compressed_tensors.transform import TransformArgs, TransformConfig, TransformScheme
 
+# Ref: https://arxiv.org/pdf/2405.16406 Fig 1
+
+# All rotations
 LLAMA_SPINQUANT = TransformConfig(
     transform_groups={
         "R1": TransformScheme(
@@ -62,3 +65,43 @@
         ),
     }
 )
+
+
+# Mergeable rotations R1 and R2 only
+LLAMA_SPINQUANT_R1R2 = TransformConfig(
+    config_groups={
+        "R1": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["embed_tokens", "o_proj", "down_proj"],
+                    location="weight_output",
+                ),
+                TransformArgs(
+                    targets=[
+                        "q_proj",
+                        "k_proj",
+                        "v_proj",
+                        "up_proj",
+                        "gate_proj",
+                        "lm_head",
+                    ],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+        ),
+        "R2": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["v_proj"],
+                    location="weight_output",
+                ),
+                TransformArgs(
+                    targets=["o_proj"], location="weight_input", inverse=True
+                ),
+            ],
+        ),
+    }
+)
diff --git a/src/llmcompressor/modifiers/transform/transform.py b/src/llmcompressor/modifiers/transform/transform.py
@@ -1,28 +1,33 @@
-from typing import Dict, Optional
+from typing import Optional
 
-from compressed_tensors.transform import TransformScheme, apply_transform_config
+from compressed_tensors.transform import TransformConfig, apply_transform_config
+from pydantic import ValidationError, model_validator
 
 from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
-
-from .template.quip import QUIP
+from llmcompressor.modifiers.transform.presets import TRANSFORM_PRESETS
 
 
 class TransformModifier(Modifier):
     preset_config: Optional[str] = None
-    config_groups: Optional[Dict[str, TransformScheme]] = None
+    config: Optional[TransformConfig] = None
 
     # model validator to validate both preset and config groups are not provided
+    @model_validator(mode="after")
+    def validate_model_after(model: "TransformModifier") -> "TransformModifier":
+        if model.preset_config is None and model.config is None:
+            raise ValidationError("Either a config or a preset_config must be provided")
+
+        if model.preset_config is not None:
+            if model.preset_config not in TRANSFORM_PRESETS:
+                raise ValidationError(
+                    f"Invalid preset_config '{model.preset_config}' "
+                    f"must be in {TRANSFORM_PRESETS.keys()}"
+                )
+            model.config = TRANSFORM_PRESETS[model.preset_config]
 
     def on_initialize(self, state: State, **kwargs) -> bool:
-        if self.preset_config is not None:
-            # import config template and customize to model
-            pass
-
-        # config = TransformConfig(config_groups=self.config_groups)
-        config = QUIP
-
-        apply_transform_config(state.model, config)
+        apply_transform_config(state.model, self.config)
 
         return True
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
`1`	`1`	`# flake8: noqa`
`2`	`2`
`3`	`3`	`from .transform import TransformModifier`
	`4`	`+from .transform.presets import TRANSFORM_PRESETS`