Skip to content

Commit a21648d

Browse files
committed
prepare for merge without spinquant
Signed-off-by: Kyle Sayers <[email protected]>
1 parent a5d3ddc commit a21648d

File tree

9 files changed

+21
-478
lines changed

9 files changed

+21
-478
lines changed

examples/transform/quip_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from llmcompressor.utils import dispatch_for_generation
88

99
# Select model and load it.
10-
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
10+
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
1111

1212
model = AutoModelForCausalLM.from_pretrained(
1313
MODEL_ID,

examples/transform/spinquant_example.py

Lines changed: 0 additions & 86 deletions
This file was deleted.
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
# flake8: noqa
22

33
from .quip import QuIPModifier
4-
from .spinquant import SpinQuantModifier

src/llmcompressor/modifiers/transform/quip/base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class QuIPModifier(Modifier):
2121
[QuIP: 2-Bit Quantization of Large Language Models With Guarantees](https://arxiv.org/abs/2307.13304) # noqa: E501
2222
2323
Transforms (rotations) are extra layers added to a model which reduce the accuracy
24-
loss induced by quantization. This is achived through "rotating" weights and
24+
loss induced by quantization. This is achieved through "rotating" weights and
2525
activations into a space with a smaller dynamic range of values, thus decreasing
2626
the range of scales required for quantization.
2727
@@ -31,7 +31,7 @@ class QuIPModifier(Modifier):
3131
:param transform_type: The type of transform to apply to the model.
3232
`"hadamard"` has the least performance cost but only supports sizes which are
3333
powers of power of two.
34-
`"random-matrix"` has more performance cost, but supports a much larger set of
34+
`"random-hadamard"` has more performance cost, but supports a much larger set of
3535
sizes.
3636
`"random-matrix"` has the greatest performance cost, but supports any size
3737
:param randomize: If true, create distinct transforms for each application
@@ -53,7 +53,9 @@ class QuIPModifier(Modifier):
5353

5454
@field_validator("randomize", "learnable", mode="before")
5555
def validate_not_implemented(cls, value, info: ValidationInfo):
56-
raise NotImplementedError(f"{info.field_name} is not supported right now")
56+
if value:
57+
raise NotImplementedError(f"{info.field_name} is not supported right now")
58+
return value
5759

5860
def on_initialize(self, state: State, **kwargs) -> bool:
5961
if self.transform_config is not None:
@@ -102,6 +104,7 @@ def _create_config(self) -> TransformConfig:
102104
TransformArgs(
103105
targets=["Linear"],
104106
location="weight_input",
107+
# location="input",
105108
inverse=True,
106109
ignore=self.ignore,
107110
),
@@ -115,6 +118,7 @@ def _create_config(self) -> TransformConfig:
115118
TransformArgs(
116119
targets=["Linear"],
117120
location="weight_output",
121+
# location="output",
118122
ignore=self.ignore,
119123
),
120124
TransformArgs(

src/llmcompressor/modifiers/transform/spinquant/__init__.py

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/llmcompressor/modifiers/transform/spinquant/base.py

Lines changed: 0 additions & 235 deletions
This file was deleted.

0 commit comments

Comments
 (0)