Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion examples/transform/quip_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
# * apply quip transforms to model in order to make quantization easier
# * quantize the weights to 4 bit with a group size 128
recipe = [
QuIPModifier(rotations=["v", "u"], transform_type="random-hadamard"),
QuIPModifier(
rotations=["v", "u"], transform_block_size=128, transform_type="random-hadamard"
),
QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
]

Expand Down
11 changes: 6 additions & 5 deletions examples/transform/spinquant_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# NOTE: currently only fused rotations (R1 & R2) are available
# Learned rotations and online rotations (R3 & R4) will be added
# in a future release.
# NOTE: currently only rotations R1, R2, and R4 are available
# R3 and learned R1/R2 rotations will be added in a future release.
# Configure the quantization algorithm to run.
# * apply spinquant transforms to model to reduce quantization loss
# * quantize the weights to 4 bit with group size 128
recipe = [
SpinQuantModifier(rotations=["R1", "R2"], transform_type="hadamard"),
SpinQuantModifier(
rotations=["R1", "R2", "R4"], transform_block_size=64, transform_type="hadamard"
),
QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
]

Expand All @@ -37,6 +38,6 @@
print("==========================================\n\n")

# Save to disk compressed.
SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2-w4a16"
SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2R4-w4a16"
model.save_pretrained(SAVE_DIR, save_compressed=True)
tokenizer.save_pretrained(SAVE_DIR)
4 changes: 2 additions & 2 deletions src/llmcompressor/modifiers/transform/quip/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _create_config(self) -> TransformConfig:
def _create_v_scheme(self) -> TransformScheme:
return TransformScheme(
type=self.transform_type,
block_size=self.transform_block_size,
head_dim=self.transform_block_size,
apply=[
TransformArgs(
targets=self.targets,
Expand All @@ -157,7 +157,7 @@ def _create_v_scheme(self) -> TransformScheme:
def _create_u_scheme(self) -> TransformScheme:
return TransformScheme(
type=self.transform_type,
block_size=self.transform_block_size,
head_dim=self.transform_block_size,
apply=[
TransformArgs(
targets=self.targets,
Expand Down
6 changes: 3 additions & 3 deletions src/llmcompressor/modifiers/transform/spinquant/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def _create_r1_scheme(self) -> TransformScheme:
randomize=self.randomize,
requires_grad=self.learnable,
precision=self.precision,
block_size=self.transform_block_size,
head_dim=self.transform_block_size,
apply=[
TransformArgs(
targets=[
Expand Down Expand Up @@ -240,7 +240,7 @@ def _create_r2_scheme(self, model: PreTrainedModel) -> TransformScheme:
randomize=self.randomize,
requires_grad=self.learnable,
precision=self.precision,
block_size=head_dim,
head_dim=head_dim,
apply=[
TransformArgs(targets=[self.mappings.attn_v], location="weight_output"),
TransformArgs(
Expand All @@ -262,7 +262,7 @@ def _create_r4_scheme(self) -> TransformScheme:
randomize=self.randomize,
requires_grad=self.learnable,
precision=self.precision,
block_size=self.transform_block_size,
head_dim=self.transform_block_size,
apply=[
TransformArgs(
targets=[*self.mappings.mlp_out],
Expand Down