Skip to content

Commit 061e3c0

Browse files
match_named_modules, add observer on_start instead of on_initialize
Signed-off-by: Brian Dellabetta <[email protected]>
1 parent bf91e1a commit 061e3c0

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

src/llmcompressor/modifiers/awq/mappings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class AWQMapping:
157157
"Phi3ForCausalLM": _phi_mappings,
158158
"Phi3VForCausalLM": _phi_mappings,
159159
"Qwen2ForCausalLM": _default_mappings,
160+
"Qwen2_5OmniThinkerForConditionalGeneration": _default_mappings,
160161
"Qwen2MoeForCausalLM": _moe_default_mappings,
161162
"Qwen3ForCausalLM": _default_mappings,
162163
"Qwen3MoeForCausalLM": _moe_default_mappings,

src/llmcompressor/modifiers/quantization/quantization/base.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import tqdm
2-
2+
from compressed_tensors.utils import (
3+
match_named_modules,
4+
)
35
from llmcompressor.core import Event, EventType, State
46
from llmcompressor.modifiers import Modifier
57
from llmcompressor.modifiers.quantization.calibration import (
@@ -69,14 +71,16 @@ def on_start(self, state: State, event: Event, **kwargs):
6971
self.started_ = True
7072
QuantizationMixin.start_calibration(self, state.model)
7173

72-
modules = list(state.model.modules())
74+
named_modules = list(
75+
match_named_modules(state.model, self.targets, self.ignore)
76+
)
7377
# TODO: this step can be combined with update_weight_zp_scale
7478
# once update_fused_layer_weight_global_scales is removed
7579
# and not required by vLLM
76-
for module in tqdm.tqdm(modules):
80+
for name, module in tqdm.tqdm(named_modules):
7781
update_weight_global_scale(module)
7882

79-
for module in tqdm.tqdm(modules, desc="Calibrating weights"):
83+
for name, module in tqdm.tqdm(named_modules, desc="Calibrating weights"):
8084
update_fused_layer_weight_global_scales(module)
8185
update_weight_zp_scale(module)
8286

src/llmcompressor/modifiers/quantization/quantization/mixin.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def validate_scheme(
116116

117117
def initialize_quantization(self, model: torch.nn.Module):
118118
"""
119-
Attach quantization schemes and observers to modules in the model according to
119+
Attach quantization schemes to modules in the model according to
120120
the quantization config specified on this modifier
121121
122122
:param model: model to attach schemes and observers to
@@ -127,25 +127,25 @@ def initialize_quantization(self, model: torch.nn.Module):
127127
config = self.resolve_quantization_config()
128128
apply_quantization_config(model, config)
129129

130-
# apply observers, disable quantization until calibration
131-
model.apply(self._initialize_observers)
130+
# disable quantization until calibration
132131
model.apply(disable_quantization)
133132

134133
def start_calibration(self, model: torch.nn.Module):
135134
"""
136-
Register activation calibration hooks (including kv_cache quantization) and
137-
enable quantization as we calibrate
135+
Attach observers, register activation calibration hooks (including
136+
kv_cache quantization) and enable quantization as we calibrate
138137
139138
:param model: model to prepare for calibration
140139
"""
141140
self._calibration_hooks = self._initialize_hooks(model)
141+
model.apply(self._initialize_observers)
142142
model.apply(apply_calibration_status)
143143
model.apply(enable_quantization) # quantize at the same time as calibrate
144144

145145
def end_calibration(self, model: torch.nn.Module):
146146
"""
147-
Remove calibration hooks and set the model status to frozen. Keep quantization
148-
enabled for future operations
147+
Remove calibration hooks and observers, and set the model status to frozen.
148+
Keep quantization enabled for future operations
149149
150150
:param model: model to end calibration for
151151
"""

0 commit comments

Comments
 (0)