|
| 1 | +from typing import Dict, List, Union |
| 2 | + |
| 3 | +from compressed_tensors.quantization import QuantizationArgs |
| 4 | +from compressed_tensors.utils import match_named_modules |
| 5 | +from pydantic import Field |
| 6 | + |
| 7 | +from llmcompressor.core import Event, EventType, State |
| 8 | +from llmcompressor.modifiers import Modifier |
| 9 | +from llmcompressor.observers.base import Observer |
| 10 | + |
| 11 | +__all__ = ["IMatrixGatherer"] |
| 12 | + |
| 13 | + |
| 14 | +class IMatrixGatherer(Modifier): |
| 15 | + """ |
| 16 | + Lifecycle trigger for iMatrix importance collection. |
| 17 | +
|
| 18 | + Triggers a calibration pass so that ``IMatrixMSEObserver`` can collect |
| 19 | + E[x²] via its ``init()`` hook. Does **not** quantize weights — the |
| 20 | + actual quantization is done by the subsequent |
| 21 | + ``QuantizationModifier`` / ``GPTQModifier``. |
| 22 | +
|
| 23 | + The observer's ``detach()`` method computes ``_imatrix_importance`` |
| 24 | + from the accumulated statistics and leaves it on the module for the |
| 25 | + next quantization pass to consume. |
| 26 | +
|
| 27 | + Example recipe:: |
| 28 | +
|
| 29 | + recipe: |
| 30 | + - IMatrixGatherer: |
| 31 | + ignore: ["lm_head"] |
| 32 | + - QuantizationModifier: |
| 33 | + config_groups: |
| 34 | + group_0: |
| 35 | + targets: ["Linear"] |
| 36 | + weights: |
| 37 | + observer: imatrix_mse |
| 38 | +
|
| 39 | + Or composed with GPTQ:: |
| 40 | +
|
| 41 | + recipe: |
| 42 | + - IMatrixGatherer: |
| 43 | + ignore: ["lm_head"] |
| 44 | + - GPTQModifier: |
| 45 | + config_groups: |
| 46 | + group_0: |
| 47 | + targets: ["Linear"] |
| 48 | + weights: |
| 49 | + observer: imatrix_mse |
| 50 | +
|
| 51 | + .. note:: |
| 52 | + Auto-prepend (inserting the gatherer automatically when |
| 53 | + ``imatrix_mse`` is detected in a recipe) is planned for a |
| 54 | + follow-up PR. |
| 55 | +
|
| 56 | + :param targets: module types to instrument (default: ``["Linear"]``) |
| 57 | + :param ignore: layer name patterns to skip (default: ``["lm_head"]``) |
| 58 | + :param weight_observer: observer to attach during calibration. |
| 59 | + Must be ``"imatrix_mse"`` (default). |
| 60 | + """ |
| 61 | + |
| 62 | + targets: Union[str, List[str]] = Field(default_factory=lambda: ["Linear"]) |
| 63 | + ignore: List[str] = Field(default_factory=lambda: ["lm_head"]) |
| 64 | + weight_observer: str = "imatrix_mse" |
| 65 | + |
| 66 | + # ------------------------------------------------------------------ # |
| 67 | + # Lifecycle |
| 68 | + # ------------------------------------------------------------------ # |
| 69 | + |
| 70 | + def on_initialize(self, state: State, **kwargs) -> bool: |
| 71 | + """ |
| 72 | + Attach iMatrix observers to target modules for E[x²] collection |
| 73 | + """ |
| 74 | + self._resolved_targets = ( |
| 75 | + self.targets if isinstance(self.targets, list) else [self.targets] |
| 76 | + ) |
| 77 | + self._observers: Dict[str, Observer] = {} |
| 78 | + |
| 79 | + # Minimal QuantizationArgs — only used to instantiate the observer, |
| 80 | + # no quantization config is applied to the model. |
| 81 | + observer_args = QuantizationArgs(observer=self.weight_observer) |
| 82 | + |
| 83 | + for name, module in match_named_modules( |
| 84 | + state.model, self._resolved_targets, self.ignore |
| 85 | + ): |
| 86 | + observer = Observer.load_from_registry( |
| 87 | + self.weight_observer, |
| 88 | + base_name="weight", |
| 89 | + args=observer_args, |
| 90 | + module=module, |
| 91 | + ) |
| 92 | + module.register_module("weight_observer", observer) |
| 93 | + observer.init(module) |
| 94 | + self._observers[name] = observer |
| 95 | + |
| 96 | + return True |
| 97 | + |
| 98 | + def on_start(self, state: State, event: Event, **kwargs): |
| 99 | + self.started_ = True |
| 100 | + |
| 101 | + def on_event(self, state: State, event: Event, **kwargs): |
| 102 | + if event.type_ == EventType.CALIBRATION_EPOCH_START: |
| 103 | + if not self.started_: |
| 104 | + self.on_start(state, None) |
| 105 | + |
| 106 | + if event.type_ == EventType.CALIBRATION_EPOCH_END: |
| 107 | + if not self.ended_: |
| 108 | + self.on_end(state, None) |
| 109 | + |
| 110 | + def on_end(self, state: State, event: Event, **kwargs): |
| 111 | + self.ended_ = True |
| 112 | + for name, observer in self._observers.items(): |
| 113 | + module = observer.module() if observer.module is not None else None |
| 114 | + if module is not None: |
| 115 | + observer.detach(module) |
| 116 | + if hasattr(module, "weight_observer"): |
| 117 | + delattr(module, "weight_observer") |
| 118 | + self._observers.clear() |
| 119 | + |
| 120 | + def on_finalize(self, state: State, **kwargs) -> bool: |
| 121 | + """ |
| 122 | + Clean up importance tensors so they don't end up in the checkpoint |
| 123 | + """ |
| 124 | + if not self.ended_: |
| 125 | + self.on_end(state, None) |
| 126 | + |
| 127 | + # Clean up importance tensors so they don't end up in checkpoint |
| 128 | + for _, module in match_named_modules( |
| 129 | + state.model, self._resolved_targets, self.ignore |
| 130 | + ): |
| 131 | + if hasattr(module, "_imatrix_importance"): |
| 132 | + del module._imatrix_importance |
| 133 | + |
| 134 | + return True |
0 commit comments