Skip to content

Commit 3fb4212

Browse files
dsikkaKyle Sayers
andauthored
Fix default for sequential updates (#186)
* fix default * update exmple * update docstring --------- Co-authored-by: Kyle Sayers <[email protected]>
1 parent 77f377b commit 3fb4212

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

examples/quantization_w4a16/llama3_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
# Select model and load it.
88
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
9+
910
model = SparseAutoModelForCausalLM.from_pretrained(
1011
MODEL_ID,
1112
device_map="auto",
@@ -54,7 +55,6 @@ def tokenize(sample):
5455

5556
# Configure the quantization algorithm to run.
5657
# * quantize the weights to 4 bit with GPTQ with a group size 128
57-
# Note: to reduce GPU memory use `sequential_update=False`
5858
recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])
5959

6060
# Apply algorithms.

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class GPTQModifier(Modifier):
4444
| test_stage:
4545
| obcq_modifiers:
4646
| GPTQModifier:
47-
| sequential_update: True
47+
| sequential_update: true
4848
| dampening_frac: 0.001
4949
| block_size: 128
5050
| config_groups:
@@ -63,7 +63,7 @@ class GPTQModifier(Modifier):
6363
6464
6565
:param sequential_update: Whether or not to update weights sequentially by layer,
66-
True saves on GPU memory
66+
True saves on GPU memory, default is True
6767
:param targets: list of layer names to compress during GPTQ, or '__ALL__'
6868
to compress every layer in the model
6969
:param block_size: Used to determine number of columns to compress in one pass
@@ -93,7 +93,7 @@ class GPTQModifier(Modifier):
9393
and activation 8 bit quantization on the Linear layers.
9494
"""
9595

96-
sequential_update: Optional[bool] = False
96+
sequential_update: Optional[bool] = True
9797
targets: Union[str, List[str], None] = None
9898
sequential_targets: Union[str, List[str], None] = None
9999
block_size: int = 128

0 commit comments

Comments
 (0)