Skip to content

Commit 9e916b6

Browse files
kylesayrsdsikka
andauthored
[GPTQ] Change actorder default to "static" (#1425)
## Purpose ## * Use best defaults for GPTQ quantization ## Prerequisites ## * #1453 * #1468 ## Changes ## * Set gptq actorder default to "static" ## Testing ## * Ran llama w4a16 example to completion and validated the correct activation ordering --------- Signed-off-by: Kyle Sayers <[email protected]> Co-authored-by: Dipika Sikka <[email protected]>
1 parent 8f8405c commit 9e916b6

19 files changed

+85
-26
lines changed

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ class GPTQModifier(Modifier, QuantizationMixin):
7272
:param block_size: Used to determine number of columns to compress in one pass
7373
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
7474
diagonal norm
75-
:param actorder: order in which weight columns are quantized. For more information,
76-
on actorder options, see https://github.com/vllm-project/vllm/pull/8135
75+
:param actorder: order in which weight columns are quantized. Defaults to "static"
76+
activation ordering, which achieves best accuracy recovery with no runtime cost.
77+
For more information, see https://github.com/vllm-project/vllm/pull/8135
7778
:param offload_hessians: Set to True for decreased memory usage but increased
7879
runtime.
7980
@@ -106,7 +107,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
106107
sequential_targets: Union[str, List[str], None] = None
107108
block_size: int = 128
108109
dampening_frac: Optional[float] = 0.01
109-
actorder: Optional[Union[ActivationOrdering, Sentinel]] = None
110+
actorder: Optional[Union[ActivationOrdering, Sentinel]] = Sentinel("static")
110111
offload_hessians: bool = False
111112

112113
# private variables
@@ -134,18 +135,17 @@ def resolve_actorder(existing):
134135
return ActivationOrdering.STATIC if existing is None else existing
135136

136137
# user-provided value always attempts to override
137-
if self.actorder is not None:
138-
if existing is None or self.actorder == existing:
139-
return self.actorder
140-
raise ValueError(
141-
"Cannot resolve activation ordering when both "
142-
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
143-
"are provided and differ. Either set `GPTQModifier.actorder = "
144-
"None` or remove `actorder` from config groups."
145-
)
138+
if existing is None or self.actorder == existing:
139+
return self.actorder
146140

147-
# setting `GPTQModifier.actorder = None` does nothing
148-
return existing
141+
# if existing provided and conflicts
142+
raise ValueError(
143+
"Cannot resolve activation ordering when both "
144+
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
145+
f"are provided and differ ({self.actorder}, {existing}). "
146+
"Either unset `GPTQModifier.actorder` or "
147+
"remove `actorder` from config groups."
148+
)
149149

150150
for scheme in config.config_groups.values():
151151
assert isinstance(scheme, QuantizationScheme)

tests/e2e/e2e_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ def data_collator(batch):
7070
# a compatible preset sceme
7171
if quant_type == "GPTQ":
7272
oneshot_kwargs["recipe"] = GPTQModifier(
73-
targets="Linear", scheme=scheme, ignore=["lm_head"]
73+
targets="Linear",
74+
scheme=scheme,
75+
actorder=None, # added for consistency with past testing configs
76+
ignore=["lm_head"],
7477
)
7578
else:
7679
oneshot_kwargs["recipe"] = QuantizationModifier(
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
cadence: "nightly"
2+
test_type: "regression"
3+
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
4+
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_none.yaml
5+
dataset_id: openai/gsm8k
6+
dataset_config: main
7+
dataset_split: train
8+
scheme: W4A16_actorder_none
9+
save_dir: TinyLlama-1.1B-Chat-v1.0-actorder-group
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
cadence: "nightly"
2+
test_type: "regression"
3+
model: Qwen/Qwen2.5-0.5B
4+
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_none.yaml
5+
dataset_id: neuralmagic/LLM_compression_calibration
6+
dataset_split: train
7+
scheme: W4A16_actorder_none
8+
save_dir: Qwen2.5-0.5B-actorder-none

tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ quant_stage:
44
smoothing_strength: 0.8
55
GPTQModifier:
66
ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"]
7+
actorder: null
78
config_groups:
89
group_0:
910
weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_static_per_tensor_act.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ quant_stage:
44
smoothing_strength: 0.8
55
GPTQModifier:
66
ignore: [lm_head]
7+
actorder: null
78
config_groups:
89
group_0:
910
weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

tests/e2e/vLLM/recipes/INT8/recipe_w8a8_dynamic_asym.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ quant_stage:
99
- re:.*post_attention_layernorm
1010
GPTQModifier:
1111
ignore: [lm_head]
12+
actorder: null
1213
config_groups:
1314
group_0:
1415
weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

tests/e2e/vLLM/recipes/INT8/recipe_w8a8_static_asym.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ quant_stage:
44
smoothing_strength: 0.8
55
GPTQModifier:
66
ignore: [lm_head]
7+
actorder: null
78
config_groups:
89
group_0:
910
weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ quant_stage:
22
quant_modifiers:
33
GPTQModifier:
44
ignore: [lm_head]
5+
actorder: null
56
config_groups:
67
group_0:
78
weights: {num_bits: 4, type: int, symmetric: true, strategy: channel, dynamic: false}

tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_asym_awq.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ quant_stage:
22
quant_modifiers:
33
AWQModifier:
44
ignore: [lm_head]
5+
actorder: null
56
config_groups:
67
group_0:
78
weights: {num_bits: 4, type: int, symmetric: false, strategy: "group", group_size: 128}

0 commit comments

Comments
 (0)