spinquant and quip_online, running but outputting gibberish

brian-dellabetta · brian-dellabetta · commit a88ca3c0ef48 · 2025-07-02T18:59:36.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/llmcompressor/modifiers/transform/__init__.py b/src/llmcompressor/modifiers/transform/__init__.py
@@ -1,4 +1,4 @@
 # flake8: noqa
 
+from .presets import TRANSFORM_PRESETS
 from .transform import TransformModifier
-from .transform.presets import TRANSFORM_PRESETS
diff --git a/src/llmcompressor/modifiers/transform/presets/__init__.py b/src/llmcompressor/modifiers/transform/presets/__init__.py
@@ -1,8 +1,9 @@
-from .quip import QUIP
+from .quip import QUIP, QUIP_ONLINE
 from .spinquant import LLAMA_SPINQUANT, LLAMA_SPINQUANT_R1R2
 
 TRANSFORM_PRESETS = {
     "QUIP": QUIP,
+    "QUIP_ONLINE": QUIP_ONLINE,
     "LLAMA_SPINQUANT": LLAMA_SPINQUANT,
     "LLAMA_SPINQUANT_R1R2": LLAMA_SPINQUANT_R1R2,
 }
diff --git a/src/llmcompressor/modifiers/transform/presets/quip.py b/src/llmcompressor/modifiers/transform/presets/quip.py
@@ -38,3 +38,61 @@
         ),
     }
 )
+
+# https://github.com/vllm-project/llm-compressor/blob/b43b27a2f277a5e62be4f8c713b84fd1c7aa116b/weight_transform.py#L24-L105
+QUIP_ONLINE = TransformConfig(
+    config_groups={
+        "u_transform_q_o_down_proj": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=[
+                        "re:.*.attn.q_proj$",
+                        "re:.*.attn.o_proj$",
+                        "re:.*.mlp.down_proj$",
+                    ],
+                    location="weight_input",
+                )
+            ],
+        ),
+        "u_transform_k_v_proj": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["re:.*.attn.k_proj$", "re:.*.attn.v_proj$"],
+                    location="weight_input",
+                )
+            ],
+        ),
+        "u_transform_gate_up_proj": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["re:.*.mlp.gate_proj$", "re:.*.mlp.up_proj$"],
+                    location="weight_input",
+                )
+            ],
+        ),
+        "v_transform_linear": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["Linear"],
+                    location="weight_output",
+                    ignore=["re:.*.mlp.down_proj$", "lm_head"],
+                    inverse=True,
+                )
+            ],
+        ),
+        "v_transform_down_proj": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["re:.*.mlp.down_proj$"],
+                    location="weight_output",
+                    inverse=True,
+                )
+            ],
+        ),
+    }
+)
diff --git a/src/llmcompressor/modifiers/transform/presets/spinquant.py b/src/llmcompressor/modifiers/transform/presets/spinquant.py
@@ -2,23 +2,23 @@
 
 # Ref: https://arxiv.org/pdf/2405.16406 Fig 1
 
-# All rotations
-LLAMA_SPINQUANT = TransformConfig(
-    transform_groups={
+# Mergeable rotations R1 and R2 only
+LLAMA_SPINQUANT_R1R2 = TransformConfig(
+    config_groups={
         "R1": TransformScheme(
             type="hadamard",
             apply=[
                 TransformArgs(
-                    targets=["embed_tokens", "o_proj", "down_proj"],
+                    targets=["re:.*embed_tokens$", "re:.*o_proj$", "re:.*down_proj$"],
                     location="weight_output",
                 ),
                 TransformArgs(
                     targets=[
-                        "q_proj",
-                        "k_proj",
-                        "v_proj",
-                        "up_proj",
-                        "gate_proj",
+                        "re:.*q_proj$",
+                        "re:.*k_proj$",
+                        "re:.*v_proj$",
+                        "re:.*up_proj$",
+                        "re:.*gate_proj$",
                         "lm_head",
                     ],
                     location="weight_input",
@@ -30,23 +30,31 @@
             type="hadamard",
             apply=[
                 TransformArgs(
-                    targets=["v_proj"],
+                    targets=["re:.*v_proj$"],
                     location="weight_output",
                 ),
                 TransformArgs(
-                    targets=["o_proj"], location="weight_input", inverse=True
+                    targets=["re:.*o_proj$"], location="weight_input", inverse=True
                 ),
             ],
         ),
+    }
+)
+
+# All rotations
+LLAMA_SPINQUANT = TransformConfig(
+    config_groups={
+        "R1": LLAMA_SPINQUANT_R1R2.config_groups["R1"],
+        "R2": LLAMA_SPINQUANT_R1R2.config_groups["R2"],
         "R3": TransformScheme(
             type="hadamard",
             apply=[
                 TransformArgs(
-                    targets=["self_attn"],
+                    targets=["re:.*self_attn$"],
                     location="k_cache",
                 ),
                 TransformArgs(
-                    targets=["self_attn"],
+                    targets=["re:.*self_attn$"],
                     location="q_attn",
                 ),
             ],
@@ -55,51 +63,11 @@
             type="hadamard",
             apply=[
                 TransformArgs(
-                    targets=["down_proj"],
+                    targets=["re:.*down_proj$"],
                     location="input",
                 ),
                 TransformArgs(
-                    targets=["down_proj"], location="weight_input", inverse=True
-                ),
-            ],
-        ),
-    }
-)
-
-
-# Mergeable rotations R1 and R2 only
-LLAMA_SPINQUANT_R1R2 = TransformConfig(
-    config_groups={
-        "R1": TransformScheme(
-            type="hadamard",
-            apply=[
-                TransformArgs(
-                    targets=["embed_tokens", "o_proj", "down_proj"],
-                    location="weight_output",
-                ),
-                TransformArgs(
-                    targets=[
-                        "q_proj",
-                        "k_proj",
-                        "v_proj",
-                        "up_proj",
-                        "gate_proj",
-                        "lm_head",
-                    ],
-                    location="weight_input",
-                    inverse=True,
-                ),
-            ],
-        ),
-        "R2": TransformScheme(
-            type="hadamard",
-            apply=[
-                TransformArgs(
-                    targets=["v_proj"],
-                    location="weight_output",
-                ),
-                TransformArgs(
-                    targets=["o_proj"], location="weight_input", inverse=True
+                    targets=["re:.*down_proj$"], location="weight_input", inverse=True
                 ),
             ],
         ),
diff --git a/src/llmcompressor/modifiers/transform/transform.py b/src/llmcompressor/modifiers/transform/transform.py
@@ -26,6 +26,8 @@ def validate_model_after(model: "TransformModifier") -> "TransformModifier":
                 )
             model.config = TRANSFORM_PRESETS[model.preset_config]
 
+        return model
+
     def on_initialize(self, state: State, **kwargs) -> bool:
         apply_transform_config(state.model, self.config)
 
diff --git a/tests/llmcompressor/modifiers/transform/test_correctness.py b/tests/llmcompressor/modifiers/transform/test_correctness.py
@@ -7,10 +7,15 @@
 
 
 @pytest.mark.parametrize(
-    "dtype,exp_max,exp_mse", [
-        (torch.bfloat16, 1.1, 0.012),  # constructing and running transforms in float32 can improve to (~0.6562, ~0.0055)  # noqa: E501
-        (torch.float32, 4e-4, 2e-9)
-    ]
+    "dtype,exp_max,exp_mse",
+    [
+        (
+            torch.bfloat16,
+            1.1,
+            0.012,
+        ),  # constructing and running transforms in float32 can improve to (~0.6562, ~0.0055)  # noqa: E501
+        (torch.float32, 4e-4, 2e-9),
+    ],
 )
 def test_apply_correctness(dtype, exp_max, exp_mse):
     model = AutoModelForCausalLM.from_pretrained(

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,8 @@ def validate_model_after(model: "TransformModifier") -> "TransformModifier":`
`26`	`26`	`)`
`27`	`27`	`model.config = TRANSFORM_PRESETS[model.preset_config]`
`28`	`28`
	`29`	`+ return model`
	`30`	`+`
`29`	`31`	`def on_initialize(self, state: State, **kwargs) -> bool:`
`30`	`32`	`apply_transform_config(state.model, self.config)`
`31`	`33`