Merge pull request #72 from jkoelker/fix/issue-69-model-state-reset

jkoelker · web-flow · commit 568b8386bf40 · 2025-12-31T23:53:33.000-06:00
Add model state reset using diffusers maybe_free_model_hooks() API
diff --git a/src/oneiro/pipelines/base.py b/src/oneiro/pipelines/base.py
@@ -180,18 +180,34 @@ def build_result(
             guidance_scale=guidance_scale,
         )
 
-    def post_generate(self, **kwargs: Any) -> None:  # noqa: B027
+    def post_generate(self, **kwargs: Any) -> None:
         """Post-generation cleanup hook called after generation completes.
 
-        Override for LoRA restore or other cleanup. This is an optional hook
-        with a no-op default; it is intentionally not abstract so subclasses
-        can choose whether to implement it.
+        This base implementation resets stateful model caches using the diffusers
+        `maybe_free_model_hooks()` API. This prevents state leakage between
+        generations (e.g., KV cache, attention state, hook state).
+
+        Subclasses should call super().post_generate(**kwargs) first, then perform
+        any additional cleanup (e.g., LoRA restore).
 
         Note: The kwargs passed here have already had 'init_image' and 'strength'
         removed by generate(). If a subclass needs access to these values,
         it should save them in pre_generate() before they are consumed.
         """
-        pass
+        self._reset_model_state()
+
+    def _reset_model_state(self) -> None:
+        """Reset stateful model caches between generations.
+
+        Uses the diffusers `maybe_free_model_hooks()` API to reset:
+        - Stateful caches (KV cache, attention state)
+        - CPU offload hooks (if model offloading is enabled)
+
+        This is the canonical way to reset diffusers pipeline state.
+        """
+        if self.pipe is None:
+            return
+        self.pipe.maybe_free_model_hooks()
 
     def unload(self) -> None:
         """Free GPU memory."""
diff --git a/src/oneiro/pipelines/civitai_checkpoint.py b/src/oneiro/pipelines/civitai_checkpoint.py
@@ -854,7 +854,8 @@ def build_generation_kwargs(
         return gen_kwargs
 
     def post_generate(self, **kwargs: Any) -> None:
-        """Post-generation cleanup: restore static LoRAs if dynamic were used."""
+        """Post-generation cleanup: reset model state and restore static LoRAs."""
+        super().post_generate(**kwargs)
         if self._has_dynamic_loras:
             self._restore_static_loras()
             self._has_dynamic_loras = False
diff --git a/tests/test_pipelines_base.py b/tests/test_pipelines_base.py
@@ -246,6 +246,44 @@ def test_configure_handles_none_cpu_count(
         assert result >= 1  # Should at least be 1
 
 
+class TestBasePipelinePostGenerate:
+    """Tests for BasePipeline.post_generate() and _reset_model_state()."""
+
+    @patch("oneiro.pipelines.base.torch.cuda.is_available", return_value=False)
+    def test_post_generate_calls_reset_model_state(self, mock_cuda):
+        """post_generate() calls _reset_model_state()."""
+        pipeline = ConcretePipeline()
+        pipeline._reset_model_state = Mock()
+        pipeline.post_generate()
+        pipeline._reset_model_state.assert_called_once()
+
+    @patch("oneiro.pipelines.base.torch.cuda.is_available", return_value=False)
+    def test_reset_model_state_calls_maybe_free_model_hooks(self, mock_cuda):
+        """_reset_model_state() calls pipe.maybe_free_model_hooks()."""
+        pipeline = ConcretePipeline()
+        mock_pipe = Mock()
+        pipeline.pipe = mock_pipe
+        pipeline._reset_model_state()
+        mock_pipe.maybe_free_model_hooks.assert_called_once()
+
+    @patch("oneiro.pipelines.base.torch.cuda.is_available", return_value=False)
+    def test_reset_model_state_handles_none_pipe(self, mock_cuda):
+        """_reset_model_state() handles pipe being None."""
+        pipeline = ConcretePipeline()
+        pipeline.pipe = None
+        # Should not raise
+        pipeline._reset_model_state()
+
+    @patch("oneiro.pipelines.base.torch.cuda.is_available", return_value=False)
+    def test_post_generate_accepts_kwargs(self, mock_cuda):
+        """post_generate() accepts arbitrary kwargs."""
+        pipeline = ConcretePipeline()
+        pipeline._reset_model_state = Mock()
+        # Should not raise
+        pipeline.post_generate(some_kwarg="value", another=123)
+        pipeline._reset_model_state.assert_called_once()
+
+
 class TestPipelineManagerLoraResolution:
     """Tests for PipelineManager.generate() LoRA path resolution."""