|
| 1 | +from typing import List, Union |
| 2 | + |
| 3 | +from compressed_tensors.quantization import disable_quantization |
| 4 | +from compressed_tensors.utils import match_named_modules |
| 5 | +from pydantic import Field |
| 6 | + |
| 7 | +from llmcompressor.core import Event, State |
| 8 | +from llmcompressor.modifiers import Modifier |
| 9 | +from llmcompressor.modifiers.quantization.quantization.mixin import QuantizationMixin |
| 10 | + |
| 11 | +__all__ = ["IMatrixGatherer"] |
| 12 | + |
| 13 | + |
| 14 | +class IMatrixGatherer(Modifier, QuantizationMixin): |
| 15 | + """ |
| 16 | + Lifecycle trigger for iMatrix importance collection. |
| 17 | +
|
| 18 | + Triggers a calibration pass so that ``IMatrixMSEObserver`` can collect |
| 19 | + E[x²] via its ``init()`` hook. Does **not** quantize weights — the |
| 20 | + actual quantization is done by the subsequent |
| 21 | + ``QuantizationModifier`` / ``GPTQModifier``. |
| 22 | +
|
| 23 | + The observer's ``detach()`` method computes ``_imatrix_importance`` |
| 24 | + from the accumulated statistics and leaves it on the module for the |
| 25 | + next quantization pass to consume. |
| 26 | +
|
| 27 | + Example recipe:: |
| 28 | +
|
| 29 | + recipe: |
| 30 | + - IMatrixGatherer: |
| 31 | + ignore: ["lm_head"] |
| 32 | + - QuantizationModifier: |
| 33 | + config_groups: |
| 34 | + group_0: |
| 35 | + targets: ["Linear"] |
| 36 | + weights: |
| 37 | + observer: imatrix_mse |
| 38 | +
|
| 39 | + Or composed with GPTQ:: |
| 40 | +
|
| 41 | + recipe: |
| 42 | + - IMatrixGatherer: |
| 43 | + ignore: ["lm_head"] |
| 44 | + - GPTQModifier: |
| 45 | + config_groups: |
| 46 | + group_0: |
| 47 | + targets: ["Linear"] |
| 48 | + weights: |
| 49 | + observer: imatrix_mse |
| 50 | +
|
| 51 | + .. note:: |
| 52 | + Auto-prepend (inserting the gatherer automatically when |
| 53 | + ``imatrix_mse`` is detected in a recipe) is planned for a |
| 54 | + follow-up PR. |
| 55 | +
|
| 56 | + :param scheme: quantization preset used to build the internal config. |
| 57 | + Defaults to ``"W4A16"``. The actual bit-width does not matter |
| 58 | + because weights are never quantized by this modifier. |
| 59 | + :param weight_observer: observer to attach during calibration. |
| 60 | + Must be ``"imatrix_mse"`` (default). |
| 61 | + :param ignore: layer name patterns to skip (default: ``["lm_head"]``) |
| 62 | + :param targets: module types to instrument (default: ``["Linear"]``) |
| 63 | + """ |
| 64 | + |
| 65 | + scheme: str = "W4A16" |
| 66 | + weight_observer: str = "imatrix_mse" |
| 67 | + ignore: List[str] = Field(default_factory=lambda: ["lm_head"]) |
| 68 | + targets: Union[str, List[str]] = Field(default_factory=lambda: ["Linear"]) |
| 69 | + |
| 70 | + # ------------------------------------------------------------------ # |
| 71 | + # Lifecycle |
| 72 | + # ------------------------------------------------------------------ # |
| 73 | + |
| 74 | + def on_initialize(self, state: State, **kwargs) -> bool: |
| 75 | + QuantizationMixin.initialize_quantization(self, state.model) |
| 76 | + return True |
| 77 | + |
| 78 | + def on_start(self, state: State, event: Event, **kwargs): |
| 79 | + self.started_ = True |
| 80 | + QuantizationMixin.start_calibration(self, state.model) |
| 81 | + # Disable quantized forward — we only need observer hooks for E[x²] |
| 82 | + state.model.apply(disable_quantization) |
| 83 | + |
| 84 | + def on_end(self, state: State, event: Event, **kwargs): |
| 85 | + self.ended_ = True |
| 86 | + QuantizationMixin.end_calibration(self, state.model) |
| 87 | + # Disable quantized forward so the model is clean for the next modifier |
| 88 | + state.model.apply(disable_quantization) |
| 89 | + |
| 90 | + def on_finalize(self, state: State, **kwargs) -> bool: |
| 91 | + if not self.ended_: |
| 92 | + self.on_end(state, None) |
| 93 | + |
| 94 | + # Clean up importance tensors so they don't end up in checkpoint |
| 95 | + for _, module in match_named_modules( |
| 96 | + state.model, self.resolved_targets, self.ignore |
| 97 | + ): |
| 98 | + if hasattr(module, "_imatrix_importance"): |
| 99 | + del module._imatrix_importance |
| 100 | + |
| 101 | + return True |
0 commit comments