add r2, increase precision

kylesayrs · kylesayrs · commit dc5c30c54df8 · 2025-07-11T11:38:49.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/examples/transform/llama3_example.py b/examples/transform/llama3_example.py
@@ -59,7 +59,6 @@ def tokenize(sample):
 # Configure the quantization algorithm to run.
 #   * quantize the weights to 4 bit with GPTQ with a group size 128
 recipe = [
-    # TODO preset_config="LLAMA_SPINQUANT_R1R2" outputs gibberish
     # TODO preset_config="QUIP_ONLINE" outputs gibberish
     # preset_config="QUIP" output sensible, but cannot load saved
     #  checkpoint or run evals (~4hrs to run)
diff --git a/src/llmcompressor/modeling/fuse.py b/src/llmcompressor/modeling/fuse.py
@@ -23,7 +23,12 @@ def fuse_norm_linears(norm: torch.nn.Module, linears: Iterable[torch.nn.Linear])
             # NOTE: spinquant does this op in float64
             exec_device = get_execution_device(norm)
             with align_module_device(norm, exec_device), align_module_device(linear, exec_device):
-                new_weight = linear.weight * norm.weight
+                
+                weight_dtype = linear.weight.dtype
+
+                new_weight = linear.weight.to(torch.float64) * norm.weight.to(torch.float64)
+
+                new_weight = new_weight.to(weight_dtype)
             
             update_offload_parameter(linear, "weight", new_weight)
 
diff --git a/src/llmcompressor/modifiers/transform/presets/spinquant.py b/src/llmcompressor/modifiers/transform/presets/spinquant.py
@@ -36,25 +36,69 @@
                 ),
             ],
         ),
-        # "R2": TransformScheme(
-        #     type="hadamard",
-        #     # TODO infer head_dim from config.json in SpinQuantModifier
-        #     head_dim=128,
-        #     apply=[
-        #         TransformArgs(targets=["re:.*v_proj$"], location="weight_output"),
-        #         TransformArgs(
-        #             targets=["re:.*o_proj$"],
-        #             location="weight_input",
-        #             inverse=True,
-        #         ),
-        #     ],
-        # ),
+        "R2": TransformScheme(
+            type="hadamard",
+            # TODO infer head_dim from config.json in SpinQuantModifier
+            head_dim=128,
+            apply=[
+                TransformArgs(targets=["re:.*v_proj$"], location="weight_output"),
+                TransformArgs(
+                    targets=["re:.*o_proj$"],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+        ),
     }
 )
 
 # All rotations
 LLAMA_SPINQUANT = TransformConfig(
     config_groups={
+        "R1": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=[
+                        # outermost rotation
+                        "re:.*embed_tokens$",
+                        # attention rotations
+                        "re:.*o_proj$",
+                        # mlp rotations
+                        "re:.*down_proj$",
+                    ],
+                    location="weight_output",
+                ),
+                TransformArgs(
+                    targets=[
+                        # outermost rotation
+                        "lm_head",
+                        # attention rotations
+                        "re:.*q_proj$",
+                        "re:.*k_proj$",
+                        "re:.*v_proj$",
+                        # mlp rotations
+                        "re:.*up_proj$",
+                        "re:.*gate_proj$",
+                    ],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+        ),
+        "R2": TransformScheme(
+            type="hadamard",
+            # TODO infer head_dim from config.json in SpinQuantModifier
+            head_dim=128,
+            apply=[
+                TransformArgs(targets=["re:.*v_proj$"], location="weight_output"),
+                TransformArgs(
+                    targets=["re:.*o_proj$"],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+        ),
         # "R1": LLAMA_SPINQUANT_R1R2.config_groups["R1"],
         # "R2": LLAMA_SPINQUANT_R1R2.config_groups["R2"],
         "R3": TransformScheme(