address review comments

a-r-r-o-w · a-r-r-o-w · commit fe939754f903 · 2025-01-16T21:32:58.000+01:00
diff --git a/src/diffusers/hooks/pyramid_attention_broadcast.py b/src/diffusers/hooks/pyramid_attention_broadcast.py
@@ -137,20 +137,34 @@ class PyramidAttentionBroadcastHook(ModelHook):
 
     _is_stateful = True
 
-    def __init__(self, skip_callback: Callable[[torch.nn.Module], bool]) -> None:
+    def __init__(
+        self, timestep_skip_range: Tuple[int, int], block_skip_range: int, current_timestep_callback: Callable[[], int]
+    ) -> None:
         super().__init__()
 
-        self.skip_callback = skip_callback
+        self.timestep_skip_range = timestep_skip_range
+        self.block_skip_range = block_skip_range
+        self.current_timestep_callback = current_timestep_callback
 
     def initialize_hook(self, module):
         self.state = PyramidAttentionBroadcastState()
         return module
 
     def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any:
-        if self.skip_callback(module):
-            output = self.state.cache
-        else:
+        is_within_timestep_range = (
+            self.timestep_skip_range[0] < self.current_timestep_callback() < self.timestep_skip_range[1]
+        )
+        should_compute_attention = (
+            self.state.cache is None
+            or self.state.iteration == 0
+            or not is_within_timestep_range
+            or self.state.iteration % self.block_skip_range == 0
+        )
+
+        if should_compute_attention:
             output = module._old_forward(*args, **kwargs)
+        else:
+            output = self.state.cache
 
         self.state.cache = output
         self.state.iteration += 1
@@ -266,44 +280,35 @@ def _apply_pyramid_attention_broadcast_on_attention_class(
         )
         return False
 
-    def skip_callback(module: torch.nn.Module) -> bool:
-        hook: PyramidAttentionBroadcastHook = module._diffusers_hook.get_hook("pyramid_attention_broadcast")
-        pab_state: PyramidAttentionBroadcastState = hook.state
-
-        if pab_state.cache is None:
-            return False
-
-        is_within_timestep_range = timestep_skip_range[0] < config.current_timestep_callback() < timestep_skip_range[1]
-        if not is_within_timestep_range:
-            # We are still not in the phase of inference where skipping attention is possible without minimal quality
-            # loss, as described in the paper. So, the attention computation cannot be skipped
-            return False
-
-        should_compute_attention = pab_state.iteration > 0 and pab_state.iteration % block_skip_range == 0
-        return not should_compute_attention
-
     logger.debug(f"Enabling Pyramid Attention Broadcast ({block_type}) in layer: {name}")
-    _apply_pyramid_attention_broadcast(module, skip_callback)
+    _apply_pyramid_attention_broadcast_hook(
+        module, timestep_skip_range, block_skip_range, config.current_timestep_callback
+    )
     return True
 
 
-def _apply_pyramid_attention_broadcast(
+def _apply_pyramid_attention_broadcast_hook(
     module: Union[Attention, MochiAttention],
-    skip_callback: Callable[[torch.nn.Module], bool],
+    timestep_skip_range: Tuple[int, int],
+    block_skip_range: int,
+    current_timestep_callback: Callable[[], int],
 ):
     r"""
     Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given torch.nn.Module.
 
     Args:
         module (`torch.nn.Module`):
             The module to apply Pyramid Attention Broadcast to.
-        skip_callback (`Callable[[nn.Module], bool]`):
-            A callback function that determines whether the attention computation should be skipped or not. The
-            callback function should return a boolean value, where `True` indicates that the attention computation
-            should be skipped, and `False` indicates that the attention computation should not be skipped. The callback
-            function will receive a torch.nn.Module containing a `_pyramid_attention_broadcast_state` attribute that
-            can should be used to retrieve and update the state of PAB for the given module.
+        timestep_skip_range (`Tuple[int, int]`):
+            The range of timesteps to skip in the attention layer. The attention computations will be conditionally
+            skipped if the current timestep is within the specified range.
+        block_skip_range (`int`):
+            The number of times a specific attention broadcast is skipped before computing the attention states to
+            re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old
+            attention states will be re-used) before computing the new attention states again.
+        current_timestep_callback (`Callable[[], int]`):
+            A callback function that returns the current inference timestep.
     """
     registry = HookRegistry.check_if_exists_or_initialize(module)
-    hook = PyramidAttentionBroadcastHook(skip_callback)
+    hook = PyramidAttentionBroadcastHook(timestep_skip_range, block_skip_range, current_timestep_callback)
     registry.register_hook(hook, "pyramid_attention_broadcast")
diff --git a/src/diffusers/models/cache_utils.py b/src/diffusers/models/cache_utils.py
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ..utils.logging import get_logger
+
+
+logger = get_logger(__name__)  # pylint: disable=invalid-name
+
 
 class CacheMixin:
     r"""
@@ -67,7 +72,8 @@ def disable_cache(self) -> None:
         from ..hooks import HookRegistry, PyramidAttentionBroadcastConfig
 
         if self._cache_config is None:
-            raise ValueError("Caching techniques have not been enabled.")
+            logger.warning("Caching techniques have not been enabled, so there's nothing to disable.")
+            return
 
         if isinstance(self._cache_config, PyramidAttentionBroadcastConfig):
             registry = HookRegistry.check_if_exists_or_initialize(self)
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -1143,8 +1143,8 @@ def maybe_free_model_hooks(self):
         Make sure to add this function to the end of the `__call__` function of your pipeline so that it functions
         correctly when applying `enable_model_cpu_offload`.
         """
-        for name, component in self.components.items():
-            if name in ("transformer", "unet") and hasattr(component, "_reset_stateful_cache"):
+        for component in self.components.values():
+            if hasattr(component, "_reset_stateful_cache"):
                 component._reset_stateful_cache()
 
         if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0: