update tests

a-r-r-o-w · a-r-r-o-w · commit f92f45e95ff9 · 2025-01-28T23:13:52.000+01:00
diff --git a/src/diffusers/hooks/faster_cache.py b/src/diffusers/hooks/faster_cache.py
@@ -31,11 +31,12 @@
 _FASTER_CACHE_BLOCK_HOOK = "faster_cache_block"
 _ATTENTION_CLASSES = (Attention, MochiAttention)
 _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = (
-    "blocks.*attn",
-    "transformer_blocks.*attn",
-    "single_transformer_blocks.*attn",
+    "^blocks.*attn",
+    "^transformer_blocks.*attn",
+    "^single_transformer_blocks.*attn"
 )
-_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("temporal_transformer_blocks.*attn",)
+_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("^temporal_transformer_blocks.*attn",)
+_TRANSFORMER_BLOCK_IDENTIFIERS = _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS + _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS
 _UNCOND_COND_INPUT_KWARGS_IDENTIFIERS = (
     "hidden_states",
     "encoder_hidden_states",
@@ -276,9 +277,10 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any:
             self.state.iteration > 0
             and is_within_timestep_range
             and self.state.iteration % self.unconditional_batch_skip_range != 0
+            and not self.is_guidance_distilled
         )
 
-        if should_skip_uncond and not self.is_guidance_distilled:
+        if should_skip_uncond:
             is_any_kwarg_uncond = any(k in self.uncond_cond_input_kwargs_identifiers for k in kwargs.keys())
             if is_any_kwarg_uncond:
                 logger.debug("FasterCache - Skipping unconditional branch computation")
@@ -483,7 +485,7 @@ def reset_state(self, module: torch.nn.Module) -> torch.nn.Module:
 
 def apply_faster_cache(
     module: torch.nn.Module,
-    config: Optional[FasterCacheConfig] = None,
+    config: FasterCacheConfig
 ) -> None:
     r"""
     Applies [FasterCache](https://huggingface.co/papers/2410.19355) to a given pipeline.
@@ -515,10 +517,6 @@ def apply_faster_cache(
     ```
     """
 
-    if config is None:
-        logger.warning("No FasterCacheConfig provided. Using default configuration.")
-        config = FasterCacheConfig()
-
     if config.attention_weight_callback is None:
         # If the user has not provided a weight callback, we default to 0.5 for all timesteps.
         # In the paper, they recommend using a gradually increasing weight from 0 to 1 as the inference progresses, but
@@ -568,7 +566,8 @@ def high_frequency_weight_callback(module: torch.nn.Module) -> float:
     for name, submodule in module.named_modules():
         if not isinstance(submodule, _ATTENTION_CLASSES):
             continue
-        _apply_faster_cache_on_attention_class(name, submodule, config)
+        if any(re.search(identifier, name) is not None for identifier in _TRANSFORMER_BLOCK_IDENTIFIERS):
+            _apply_faster_cache_on_attention_class(name, submodule, config)
 
 
 def _apply_faster_cache_on_denoiser(module: torch.nn.Module, config: FasterCacheConfig) -> None:
@@ -590,13 +589,10 @@ def _apply_faster_cache_on_attention_class(name: str, module: Attention, config:
     is_spatial_self_attention = (
         any(re.search(identifier, name) is not None for identifier in config.spatial_attention_block_identifiers)
         and config.spatial_attention_block_skip_range is not None
-        and not module.is_cross_attention
+        and not getattr(module, "is_cross_attention", False)
     )
     is_temporal_self_attention = (
-        any(
-            f"{identifier}." in name or identifier == name
-            for identifier in config.temporal_attention_block_identifiers
-        )
+        any(re.search(identifier, name) is not None for identifier in config.temporal_attention_block_identifiers)
         and config.temporal_attention_block_skip_range is not None
         and not module.is_cross_attention
     )
@@ -633,7 +629,7 @@ def _apply_faster_cache_on_attention_class(name: str, module: Attention, config:
     registry.register_hook(hook, _FASTER_CACHE_BLOCK_HOOK)
 
 
-# Reference: https://github.com/Vchitect/FasterCache/blob/fab32c15014636dc854948319c0a9a8d92c7acb4/scripts/latte/fastercache_sample_latte.py#L127C1-L143C39
+# Reference: https://github.com/Vchitect/FasterCache/blob/fab32c15014636dc854948319c0a9a8d92c7acb4/scripts/latte/faster_cache_sample_latte.py#L127C1-L143C39
 @torch.no_grad()
 def _split_low_high_freq(x):
     fft = torch.fft.fft2(x)
diff --git a/src/diffusers/hooks/pyramid_attention_broadcast.py b/src/diffusers/hooks/pyramid_attention_broadcast.py
@@ -177,7 +177,7 @@ def reset_state(self, module: torch.nn.Module) -> None:
 
 def apply_pyramid_attention_broadcast(
     module: torch.nn.Module,
-    config: PyramidAttentionBroadcastConfig,
+    config: PyramidAttentionBroadcastConfig
 ):
     r"""
     Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given pipeline.
diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
@@ -7,7 +7,13 @@
 from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
 
-from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
+from diffusers import (
+    AutoencoderKL,
+    FasterCacheConfig,
+    FlowMatchEulerDiscreteScheduler,
+    FluxPipeline,
+    FluxTransformer2DModel,
+)
 from diffusers.utils.testing_utils import (
     nightly,
     numpy_cosine_similarity_distance,
@@ -41,6 +47,14 @@ class FluxPipelineFastTests(
     test_xformers_attention = False
     test_layerwise_casting = True
 
+    faster_cache_config = FasterCacheConfig(
+        spatial_attention_block_skip_range=2,
+        spatial_attention_timestep_skip_range=(-1, 901),
+        unconditional_batch_skip_range=2,
+        attention_weight_callback=lambda _: 0.5,
+        is_guidance_distilled=True,
+    )
+
     def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1):
         torch.manual_seed(0)
         transformer = FluxTransformer2DModel(
diff --git a/tests/pipelines/hunyuan_video/test_hunyuan_video.py b/tests/pipelines/hunyuan_video/test_hunyuan_video.py
@@ -21,6 +21,7 @@
 
 from diffusers import (
     AutoencoderKLHunyuanVideo,
+    FasterCacheConfig,
     FlowMatchEulerDiscreteScheduler,
     HunyuanVideoPipeline,
     HunyuanVideoTransformer3DModel,
@@ -30,13 +31,20 @@
     torch_device,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, PyramidAttentionBroadcastTesterMixin, to_np
+from ..test_pipelines_common import (
+    FasterCacheTesterMixin,
+    PipelineTesterMixin,
+    PyramidAttentionBroadcastTesterMixin,
+    to_np,
+)
 
 
 enable_full_determinism()
 
 
-class HunyuanVideoPipelineFastTests(PipelineTesterMixin, PyramidAttentionBroadcastTesterMixin, unittest.TestCase):
+class HunyuanVideoPipelineFastTests(
+    PipelineTesterMixin, PyramidAttentionBroadcastTesterMixin, FasterCacheTesterMixin, unittest.TestCase
+):
     pipeline_class = HunyuanVideoPipeline
     params = frozenset(["prompt", "height", "width", "guidance_scale", "prompt_embeds", "pooled_prompt_embeds"])
     batch_params = frozenset(["prompt"])
@@ -55,6 +63,14 @@ class HunyuanVideoPipelineFastTests(PipelineTesterMixin, PyramidAttentionBroadca
     test_xformers_attention = False
     test_layerwise_casting = True
 
+    faster_cache_config = FasterCacheConfig(
+        spatial_attention_block_skip_range=2,
+        spatial_attention_timestep_skip_range=(-1, 901),
+        unconditional_batch_skip_range=2,
+        attention_weight_callback=lambda _: 0.5,
+        is_guidance_distilled=True,
+    )
+
     def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1):
         torch.manual_seed(0)
         transformer = HunyuanVideoTransformer3DModel(
diff --git a/tests/pipelines/latte/test_latte.py b/tests/pipelines/latte/test_latte.py
@@ -75,7 +75,7 @@ class LattePipelineFastTests(
         cross_attention_block_identifiers=["transformer_blocks"],
     )
 
-    fastercache_config = FasterCacheConfig(
+    faster_cache_config = FasterCacheConfig(
         spatial_attention_block_skip_range=2,
         temporal_attention_block_skip_range=2,
         spatial_attention_timestep_skip_range=(-1, 901),
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py

Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ class LattePipelineFastTests(`
`75`	`75`	`cross_attention_block_identifiers=["transformer_blocks"],`
`76`	`76`	`)`
`77`	`77`
`78`		`- fastercache_config = FasterCacheConfig(`
	`78`	`+ faster_cache_config = FasterCacheConfig(`
`79`	`79`	`spatial_attention_block_skip_range=2,`
`80`	`80`	`temporal_attention_block_skip_range=2,`
`81`	`81`	`spatial_attention_timestep_skip_range=(-1, 901),`