remove hooks on calibration epoch end

kylesayrs · kylesayrs · commit b9c91e7e22f6 · 2025-04-22T14:41:49.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/obcq/base.py b/src/llmcompressor/modifiers/obcq/base.py
@@ -10,7 +10,7 @@
 from loguru import logger
 from pydantic import PrivateAttr
 
-from llmcompressor.core import Event, EventType, State
+from llmcompressor.core import State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.obcq.sgpt_mixin import SparsityModifierMixin
 from llmcompressor.modifiers.obcq.sgpt_sparsify import (
@@ -113,13 +113,6 @@ def calibrate_module(
                 self._num_samples[module],
             )
 
-    def on_event(self, state: State, event: Event, **kwargs):
-        if event.type_ in (
-            EventType.SEQUENTIAL_EPOCH_END,
-            EventType.CALIBRATION_EPOCH_END,
-        ):
-            self.compress_modules()
-
     def compress_modules(self):
         """
         Sparsify modules which have been calibrated
@@ -163,10 +156,13 @@ def _maybe_onload_hessian(self, module: torch.nn.Module):
                 self._hessians[module] = self._hessians[module].to(device="cpu")
 
     def on_finalize(self, state: State, **kwargs) -> bool:
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)  # remove hooks
+
         if len(self._num_samples) > 0:
             raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
 
-        self.remove_hooks()
         self._hessians = dict()
         self._num_samples = dict()
         self._module_names = dict()
diff --git a/src/llmcompressor/modifiers/obcq/sgpt_mixin.py b/src/llmcompressor/modifiers/obcq/sgpt_mixin.py
@@ -9,7 +9,8 @@
 from loguru import logger
 from pydantic import Field, PrivateAttr, field_validator, model_validator
 
-from llmcompressor.core import State
+from llmcompressor.core import Event, EventType, State
+from llmcompressor.modifiers.modifier import Modifier
 from llmcompressor.modifiers.utils.hooks import HooksMixin
 from llmcompressor.pipelines.basic import run_pipeline as run_basic
 from llmcompressor.utils.pytorch.module import (
@@ -20,7 +21,7 @@
 )
 
 
-class SparsityModifierMixin(HooksMixin):
+class SparsityModifierMixin(Modifier):
     # modifier arguments
     sparsity: Optional[Union[float, List[float]]]
     sparsity_profile: Optional[str] = None
@@ -93,6 +94,10 @@ def calibrate_module(
     ):
         raise NotImplementedError()
 
+    @abstractmethod
+    def compress_modules(self):
+        raise NotImplementedError()
+
     def on_initialize(self, state: "State", **kwargs) -> bool:
         """
         Initialize and run the OBCQ algorithm on the current state
@@ -158,6 +163,21 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
 
         return True
 
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
+            self.compress_modules()
+
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self.compress_modules()
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        self.ended_ = True  # TODO: move to super call
+        self.remove_hooks()
+
     def _infer_sequential_targets(
         self, model: torch.nn.Module
     ) -> Union[str, List[str]]:
diff --git a/src/llmcompressor/modifiers/pruning/wanda/base.py b/src/llmcompressor/modifiers/pruning/wanda/base.py
@@ -9,7 +9,7 @@
 from loguru import logger
 from pydantic import PrivateAttr
 
-from llmcompressor.core import Event, EventType, State
+from llmcompressor.core import State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.obcq.sgpt_mixin import SparsityModifierMixin
 from llmcompressor.modifiers.pruning.wanda.wanda_sparsify import (
@@ -99,13 +99,6 @@ def calibrate_module(
             self._num_samples[module],
         )
 
-    def on_event(self, state: State, event: Event, **kwargs):
-        if event.type_ in (
-            EventType.SEQUENTIAL_EPOCH_END,
-            EventType.CALIBRATION_EPOCH_END,
-        ):
-            self.compress_modules()
-
     def compress_modules(self):
         """
         Sparsify modules which have been calibrated
@@ -133,10 +126,13 @@ def compress_modules(self):
             del self._num_samples[module]
 
     def on_finalize(self, state: State, **kwargs) -> bool:
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)  # remove hooks
+
         if len(self._num_samples) > 0:
             raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
 
-        self.remove_hooks()
         self._row_scalars = dict()
         self._num_samples = dict()
         self._module_names = dict()
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -160,21 +160,41 @@ def on_initialize(self, state: State, **kwargs) -> bool:
 
         return True
 
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
+            self.compress_modules()
+
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self.compress_modules()
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        """
+        Finish calibrating by removing observers and calibration hooks
+        """
+        self.ended_ = True  # TODO: move to super call
+        state.model.apply(freeze_module_quantization)  # remove observers
+        self.remove_hooks()  # remove hooks
+
     def on_finalize(self, state: State, **kwargs) -> bool:
         """
         disable the quantization observers used by the OBCQ algorithm
 
         :param state: session state storing input model and calibration data
         """
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)
+
         if len(self._num_samples) > 0:
             raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
 
         self._hessians = dict()
         self._num_samples = dict()
 
-        state.model.apply(freeze_module_quantization)  # remove observers
-        self.remove_hooks()  # remove hooks
-
         return True
 
     def calibrate_module(
@@ -211,13 +231,6 @@ def calibrate_module(
                 self._num_samples[module],
             )
 
-    def on_event(self, state: State, event: Event, **kwargs):
-        if event.type_ in (
-            EventType.SEQUENTIAL_EPOCH_END,
-            EventType.CALIBRATION_EPOCH_END,
-        ):
-            self.compress_modules()
-
     def compress_modules(self):
         """
         Quantize modules which have been calibrated
diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -1,7 +1,7 @@
 import tqdm
 from compressed_tensors.quantization import disable_quantization, enable_quantization
 
-from llmcompressor.core import Event, State
+from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.quantization.calibration import (
     apply_calibration_status,
@@ -81,14 +81,21 @@ def on_start(self, state: State):
         for module in tqdm.tqdm(modules, desc="Calibrating weights"):
             update_weight_zp_scale(module)
 
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
     def on_end(self, state: State, event: Event, **kwargs):
         """
         Finish calibrating by removing observers and calibration hooks
         """
+        self.ended_ = True  # TODO: move to super call
         state.model.apply(freeze_module_quantization)  # remove observers
         self.remove_hooks()  # remove hooks
 
     def on_finalize(self, state: State, **kwargs) -> bool:
-        # TODO: modify lifecycle so modifiers end on finalize
+        # TODO: modify lifecycle to end on finalize
         if not self.ended_:
             self.on_end(state, None)
diff --git a/src/llmcompressor/modifiers/smoothquant/base.py b/src/llmcompressor/modifiers/smoothquant/base.py
@@ -137,27 +137,31 @@ def on_initialize(self, state: State, **kwargs) -> bool:
         return True
 
     def on_event(self, state: State, event: Event, **kwargs):
-        """
-        Sparsify modules which have been calibrated with samples
-        """
-        if event.type_ in (
-            EventType.SEQUENTIAL_EPOCH_END,
-            EventType.CALIBRATION_EPOCH_END,
-        ):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
             self._apply_smoothing(state.model)
 
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self._apply_smoothing(state.model)
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        self.ended_ = True  # TODO: move to super calls
+        self.remove_hooks()  # remove hooks
+
     def on_finalize(self, state: State, **kwargs) -> bool:
         """
         Clean up by clearing the scale and mapping data
-
-        :param state: unused
-        :return: True
         """
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)
+
         if len(self.scales_) > 0:
             raise ValueError(f"Failed to compress {len(self.scales_)} modules")
 
-        self.remove_hooks()
-
         if self.scales_ is not None:
             self.scales_.clear()
         if self.resolved_mappings_ is not None: