refactor

a-r-r-o-w · a-r-r-o-w · commit 9182f57c49bb · 2024-12-09T21:00:16.000+01:00
diff --git a/src/diffusers/models/hooks.py b/src/diffusers/models/hooks.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import functools
-from typing import Any, Callable, Dict, Tuple
+from typing import Any, Dict, Tuple
 
 import torch
 
@@ -78,9 +78,6 @@ def detach_hook(self, module: torch.nn.Module) -> torch.nn.Module:
         """
         return module
 
-    def reset_state(self, module: torch.nn.Module) -> torch.nn.Module:
-        return module
-
 
 class SequentialHook(ModelHook):
     r"""A hook that can contain several hooks and iterates through them at each event."""
@@ -108,62 +105,6 @@ def detach_hook(self, module):
             module = hook.detach_hook(module)
         return module
 
-    def reset_state(self, module):
-        for hook in self.hooks:
-            module = hook.reset_state(module)
-        return module
-
-
-class PyramidAttentionBroadcastHook(ModelHook):
-    def __init__(self, skip_callback: Callable[[torch.nn.Module], bool]) -> None:
-        super().__init__()
-
-        self.skip_callback = skip_callback
-
-        self.cache = None
-        self._iteration = 0
-
-    def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any:
-        args, kwargs = module._diffusers_hook.pre_forward(module, *args, **kwargs)
-
-        if self.cache is not None and self.skip_callback(module):
-            output = self.cache
-        else:
-            output = module._old_forward(*args, **kwargs)
-
-        return module._diffusers_hook.post_forward(module, output)
-
-    def post_forward(self, module: torch.nn.Module, output: Any) -> Any:
-        self.cache = output
-        return output
-
-    def reset_state(self, module: torch.nn.Module) -> torch.nn.Module:
-        self.cache = None
-        self._iteration = 0
-        return module
-
-
-class LayerSkipHook(ModelHook):
-    def __init__(self, skip_: Callable[[torch.nn.Module], bool]) -> None:
-        super().__init__()
-
-        self.skip_callback = skip_
-
-    def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any:
-        args, kwargs = module._diffusers_hook.pre_forward(module, *args, **kwargs)
-
-        if self.skip_callback(module):
-            # We want to skip this layer, so we have to return the input of the current layer
-            # as output of the next layer. But at this point, we don't have information about
-            # the arguments required by next layer. Even if we did, order matters unless we
-            # always pass kwargs. But that is not the case usually with hidden_states, encoder_hidden_states,
-            # temb, etc. TODO(aryan): implement correctly later
-            output = None
-        else:
-            output = module._old_forward(*args, **kwargs)
-
-        return module._diffusers_hook.post_forward(module, output)
-
 
 def add_hook_to_module(module: torch.nn.Module, hook: ModelHook, append: bool = False):
     r"""
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -1088,10 +1088,6 @@ def maybe_free_model_hooks(self):
         is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
         functions correctly when applying enable_model_cpu_offload.
         """
-
-        if hasattr(self, "_diffusers_hook"):
-            self._diffusers_hook.reset_state()
-
         if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
             # `enable_model_cpu_offload` has not be called, so silently do nothing
             return
diff --git a/src/diffusers/pipelines/pyramid_attention_broadcast_utils.py b/src/diffusers/pipelines/pyramid_attention_broadcast_utils.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 from dataclasses import dataclass
-from typing import Callable, Optional, Protocol, Tuple
+from typing import Any, Callable, Optional, Tuple
 
 import torch.nn as nn
 
 from ..models.attention_processor import Attention
-from ..models.hooks import PyramidAttentionBroadcastHook, add_hook_to_module
+from ..models.hooks import ModelHook, add_hook_to_module
 from ..utils import logging
 from .pipeline_utils import DiffusionPipeline
 
@@ -28,7 +28,7 @@
 
 _ATTENTION_CLASSES = (Attention,)
 
-_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks")
+_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks", "single_transformer_blocks")
 _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("temporal_transformer_blocks",)
 _CROSS_ATTENTION_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks")
 
@@ -96,21 +96,15 @@ class PyramidAttentionBroadcastState:
 
     def __init__(self) -> None:
         self.iteration = 0
+        self.cache = None
+
+    def update_state(self, output: Any) -> None:
+        self.iteration += 1
+        self.cache = output
 
     def reset_state(self):
         self.iteration = 0
-
-
-class nnModulePAB(Protocol):
-    r"""
-    Type hint for a torch.nn.Module that contains a `_pyramid_attention_broadcast_state` attribute.
-
-    Attributes:
-        _pyramid_attention_broadcast_state (`PyramidAttentionBroadcastState`):
-            The state of Pyramid Attention Broadcast.
-    """
-
-    _pyramid_attention_broadcast_state: PyramidAttentionBroadcastState
+        self.cache = None
 
 
 def apply_pyramid_attention_broadcast(
@@ -247,14 +241,15 @@ def _apply_pyramid_attention_broadcast_on_attention_class(
         )
         return
 
-    def skip_callback(module: nnModulePAB) -> bool:
+    def skip_callback(module: nn.Module) -> bool:
         pab_state = module._pyramid_attention_broadcast_state
-        current_timestep = pipeline._current_timestep
-        is_within_timestep_range = timestep_skip_range[0] < current_timestep < timestep_skip_range[1]
+        if pab_state.cache is None:
+            return False
+
+        is_within_timestep_range = timestep_skip_range[0] < pipeline._current_timestep < timestep_skip_range[1]
 
         if is_within_timestep_range:
             should_compute_attention = pab_state.iteration > 0 and pab_state.iteration % block_skip_range == 0
-            pab_state.iteration += 1
             return not should_compute_attention
 
         # We are still not in the phase of inference where skipping attention is possible without minimal quality
@@ -263,3 +258,24 @@ def skip_callback(module: nnModulePAB) -> bool:
 
     logger.debug(f"Enabling Pyramid Attention Broadcast ({block_type}) in layer: {name}")
     apply_pyramid_attention_broadcast_on_module(module, skip_callback)
+
+
+class PyramidAttentionBroadcastHook(ModelHook):
+    def __init__(self, skip_callback: Callable[[nn.Module], bool]) -> None:
+        super().__init__()
+
+        self.skip_callback = skip_callback
+
+    def new_forward(self, module: nn.Module, *args, **kwargs) -> Any:
+        args, kwargs = module._diffusers_hook.pre_forward(module, *args, **kwargs)
+
+        if self.skip_callback(module):
+            output = module._pyramid_attention_broadcast_state.cache
+        else:
+            output = module._old_forward(*args, **kwargs)
+
+        return module._diffusers_hook.post_forward(module, output)
+
+    def post_forward(self, module: nn.Module, output: Any) -> Any:
+        module._pyramid_attention_broadcast_state.update_state(output)
+        return output