Skip to content

Commit a2675dc

Browse files
kylesayrsdsikka
andauthored
[GPTQ] Fix actorder resolution, add sentinel (#1453)
## Purpose ## * Fix false assumption that `actorder` field is of enum type * Despite the fact that actorder passes through a [field_validator](https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_args.py#L200), `QuantizationArgs` has the [use_enum_values](https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_args.py#L128) configuration set, meaning that enum values are converted to strings. * This was done in relation to [this fix](neuralmagic/sparseml#2327) * Remove conflict with recipes which manually specify activation ordering by using a sentinel value ## Follow ups ## * #1425 ## Testing ## * Ran llama3 example with manually specified `actorder=group` --------- Signed-off-by: Kyle Sayers <[email protected]> Co-authored-by: Dipika Sikka <[email protected]>
1 parent dc063de commit a2675dc

File tree

3 files changed

+81
-16
lines changed

3 files changed

+81
-16
lines changed

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
quantize_weight,
2727
)
2828
from llmcompressor.modifiers.quantization.quantization import QuantizationMixin
29+
from llmcompressor.sentinel import Sentinel
2930
from llmcompressor.utils.metric_logging import CompressionLogger
3031

3132
__all__ = ["GPTQModifier"]
@@ -109,7 +110,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
109110
sequential_targets: Union[str, List[str], None] = None
110111
block_size: int = 128
111112
dampening_frac: Optional[float] = 0.01
112-
actorder: Optional[ActivationOrdering] = None
113+
actorder: Optional[Union[ActivationOrdering, Sentinel]] = None
113114
offload_hessians: bool = False
114115

115116
# private variables
@@ -131,23 +132,29 @@ def validate_sequential_update(cls, value: bool) -> bool:
131132
def resolve_quantization_config(self) -> QuantizationConfig:
132133
config = super().resolve_quantization_config()
133134

134-
# Resolve config with `self.actorder`
135+
def resolve_actorder(existing):
136+
# sentinel default only overrides if existing is None
137+
if self.actorder == Sentinel("static"):
138+
return ActivationOrdering.STATIC if existing is None else existing
139+
140+
# user-provided value always attempts to override
141+
if self.actorder is not None:
142+
if existing is None or self.actorder == existing:
143+
return self.actorder
144+
raise ValueError(
145+
"Cannot resolve activation ordering when both "
146+
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
147+
"are provided and differ. Either set `GPTQModifier.actorder = "
148+
"None` or remove `actorder` from config groups."
149+
)
150+
151+
# setting `GPTQModifier.actorder = None` does nothing
152+
return existing
153+
135154
for scheme in config.config_groups.values():
136-
assert isinstance(scheme, QuantizationScheme) # (1)
155+
assert isinstance(scheme, QuantizationScheme)
137156
if scheme.weights is not None:
138-
existing = scheme.weights.actorder
139-
assert isinstance(existing, (ActivationOrdering, type(None))) # (2)
140-
if existing is not None and existing != self.actorder:
141-
raise ValueError(
142-
"Cannot resolve activation ordering when both "
143-
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
144-
"both are provided. Either set `GPTQModifier.actorder = None` "
145-
"or remove `actorder` from config groups"
146-
)
147-
scheme.weights.actorder = self.actorder
148-
149-
# (1) QuantizationConfig.model_post_init
150-
# (2) QuantizationScheme.validate_actorder
157+
scheme.weights.actorder = resolve_actorder(scheme.weights.actorder)
151158

152159
return config
153160

src/llmcompressor/sentinel.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import inspect
2+
3+
from pydantic_core import core_schema
4+
5+
_registry = {}
6+
7+
8+
class Sentinel:
9+
"""
10+
Unique sentinel values. Implements https://peps.python.org/pep-0661/
11+
with dummy pydantic validation
12+
"""
13+
14+
def __new__(cls, name, module_name=None):
15+
name = str(name)
16+
17+
if module_name is None:
18+
module_name = inspect.currentframe().f_globals.get("__file__")
19+
if module_name is None:
20+
module_name = __name__
21+
22+
registry_key = f"{module_name}-{name}"
23+
24+
sentinel = _registry.get(registry_key, None)
25+
if sentinel is not None:
26+
return sentinel
27+
28+
sentinel = super().__new__(cls)
29+
sentinel._name = name
30+
sentinel._module_name = module_name
31+
32+
return _registry.setdefault(registry_key, sentinel)
33+
34+
def __repr__(self):
35+
return self._name
36+
37+
def __reduce__(self):
38+
return (
39+
self.__class__,
40+
(
41+
self._name,
42+
self._module_name,
43+
),
44+
)
45+
46+
@classmethod
47+
def __get_pydantic_core_schema__(cls, _source_type, _handler):
48+
return core_schema.no_info_plain_validator_function(cls.validate)
49+
50+
@classmethod
51+
def validate(cls, value: "Sentinel") -> "Sentinel":
52+
return value

tests/llmcompressor/test_sentinel.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from llmcompressor.sentinel import Sentinel
2+
3+
4+
def test_sentinel():
5+
assert Sentinel("MISSING") == Sentinel("MISSING")
6+
assert Sentinel("MISSING", "module_one") != Sentinel("MISSING", "module_two")

0 commit comments

Comments
 (0)