[Transforms] Update examples for R4 and transform_block_size option (#1870)

brian-dellabetta · web-flow · commit 4c95fd2c3dec · 2025-09-30T11:39:09.000-04:00
SUMMARY: Prerequisites: - [x] neuralmagic/compressed-tensors#472 This PR updates the SpinQuant and Quip examples to include `transform_block_size` and the latest R4 feature in SpinQuant. It also reverts the `TransformScheme.block_size` changes previously introduced into CT, and updated in Pr linked above. While `block_size` is a more appropriate name, `head_dim` has already landed in vllm, and it would be too much of a pain to change. Users will rarely create their own `TransformScheme` anyway. TEST PLAN: - [x] Both examples run and the saved model can be run in vllm, output is meaningful. - [x] with prints, confirmed hadacore is used for `QuIPModifier(rotations=["v", "u"], transform_block_size=64, transform_type="hadamard")` - [x] and dense gemm is used for `QuIPModifier(rotations=["v", "u"], transform_block_size=64, transform_type="random-hadamard")` --------- Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
diff --git a/examples/transform/quip_example.py b/examples/transform/quip_example.py
@@ -21,7 +21,9 @@
 #   * apply quip transforms to model in order to make quantization easier
 #   * quantize the weights to 4 bit with a group size 128
 recipe = [
-    QuIPModifier(rotations=["v", "u"], transform_type="random-hadamard"),
+    QuIPModifier(
+        rotations=["v", "u"], transform_block_size=128, transform_type="random-hadamard"
+    ),
     QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
 ]
 
diff --git a/examples/transform/spinquant_example.py b/examples/transform/spinquant_example.py
@@ -11,14 +11,15 @@
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
-# NOTE: currently only fused rotations (R1 & R2) are available
-# Learned rotations and online rotations (R3 & R4) will be added
-# in a future release.
+# NOTE: currently only rotations R1, R2, and R4 are available
+# R3 and learned R1/R2 rotations will be added in a future release.
 # Configure the quantization algorithm to run.
 #   * apply spinquant transforms to model to reduce quantization loss
 #   * quantize the weights to 4 bit with group size 128
 recipe = [
-    SpinQuantModifier(rotations=["R1", "R2"], transform_type="hadamard"),
+    SpinQuantModifier(
+        rotations=["R1", "R2", "R4"], transform_block_size=64, transform_type="hadamard"
+    ),
     QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
 ]
 
@@ -37,6 +38,6 @@
 print("==========================================\n\n")
 
 # Save to disk compressed.
-SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2-w4a16"
+SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2R4-w4a16"
 model.save_pretrained(SAVE_DIR, save_compressed=True)
 tokenizer.save_pretrained(SAVE_DIR)
diff --git a/src/llmcompressor/modifiers/transform/quip/base.py b/src/llmcompressor/modifiers/transform/quip/base.py
@@ -135,7 +135,7 @@ def _create_config(self) -> TransformConfig:
     def _create_v_scheme(self) -> TransformScheme:
         return TransformScheme(
             type=self.transform_type,
-            block_size=self.transform_block_size,
+            head_dim=self.transform_block_size,
             apply=[
                 TransformArgs(
                     targets=self.targets,
@@ -157,7 +157,7 @@ def _create_v_scheme(self) -> TransformScheme:
     def _create_u_scheme(self) -> TransformScheme:
         return TransformScheme(
             type=self.transform_type,
-            block_size=self.transform_block_size,
+            head_dim=self.transform_block_size,
             apply=[
                 TransformArgs(
                     targets=self.targets,
diff --git a/src/llmcompressor/modifiers/transform/spinquant/base.py b/src/llmcompressor/modifiers/transform/spinquant/base.py
@@ -193,7 +193,7 @@ def _create_r1_scheme(self) -> TransformScheme:
             randomize=self.randomize,
             requires_grad=self.learnable,
             precision=self.precision,
-            block_size=self.transform_block_size,
+            head_dim=self.transform_block_size,
             apply=[
                 TransformArgs(
                     targets=[
@@ -240,7 +240,7 @@ def _create_r2_scheme(self, model: PreTrainedModel) -> TransformScheme:
             randomize=self.randomize,
             requires_grad=self.learnable,
             precision=self.precision,
-            block_size=head_dim,
+            head_dim=head_dim,
             apply=[
                 TransformArgs(targets=[self.mappings.attn_v], location="weight_output"),
                 TransformArgs(
@@ -262,7 +262,7 @@ def _create_r4_scheme(self) -> TransformScheme:
             randomize=self.randomize,
             requires_grad=self.learnable,
             precision=self.precision,
-            block_size=self.transform_block_size,
+            head_dim=self.transform_block_size,
             apply=[
                 TransformArgs(
                     targets=[*self.mappings.mlp_out],