improve log message; fix latte test

a-r-r-o-w · a-r-r-o-w · commit 0b2629db040f · 2024-12-09T15:13:06.000+01:00
diff --git a/src/diffusers/pipelines/pyramid_attention_broadcast_utils.py b/src/diffusers/pipelines/pyramid_attention_broadcast_utils.py
@@ -220,7 +220,7 @@ def _apply_pyramid_attention_broadcast_on_attention_class(
     is_cross_attention = (
         any(f"{identifier}." in name or identifier == name for identifier in config.cross_attention_block_identifiers)
         and config.cross_attention_block_skip_range is not None
-        and not module.is_cross_attention
+        and module.is_cross_attention
     )
 
     block_skip_range, timestep_skip_range, block_type = None, None, None
@@ -238,7 +238,13 @@ def _apply_pyramid_attention_broadcast_on_attention_class(
         block_type = "cross"
 
     if block_skip_range is None or timestep_skip_range is None:
-        logger.warning(f"Unable to apply Pyramid Attention Broadcast to the selected layer: {name}.")
+        logger.info(
+            f'Unable to apply Pyramid Attention Broadcast to the selected layer: "{name}" because it does '
+            f"not match any of the required criteria for spatial, temporal or cross attention layers. Note, "
+            f"however, that this layer may still be valid for applying PAB. Please specify the correct "
+            f"block identifiers in the configuration or use the specialized `apply_pyramid_attention_broadcast_on_module` "
+            f"function to apply PAB to this layer."
+        )
         return
 
     def skip_callback(module: nnModulePAB) -> bool:
diff --git a/tests/pipelines/latte/test_latte.py b/tests/pipelines/latte/test_latte.py
@@ -22,11 +22,10 @@
 import torch
 from transformers import AutoTokenizer, T5EncoderModel
 
-from diffusers import (
-    AutoencoderKL,
-    DDIMScheduler,
-    LattePipeline,
-    LatteTransformer3DModel,
+from diffusers import AutoencoderKL, DDIMScheduler, LattePipeline, LatteTransformer3DModel
+from diffusers.pipelines.pyramid_attention_broadcast_utils import (
+    PyramidAttentionBroadcastConfig,
+    apply_pyramid_attention_broadcast,
 )
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
@@ -277,33 +276,24 @@ def test_pyramid_attention_broadcast(self):
         frames = pipe(**inputs).frames  # [B, F, C, H, W]
         original_image_slice = frames[0, -2:, -1, -3:, -3:]
 
-        pipe.enable_pyramid_attention_broadcast(spatial_attn_skip_range=2, spatial_attn_timestep_range=(100, 800))
-        assert pipe.pyramid_attention_broadcast_enabled
+        config = PyramidAttentionBroadcastConfig(
+            spatial_attention_block_skip_range=2,
+            temporal_attention_block_skip_range=3,
+            spatial_attention_timestep_skip_range=(100, 800),
+            temporal_attention_timestep_skip_range=(100, 800),
+        )
+        apply_pyramid_attention_broadcast(pipe, config)
 
         inputs = self.get_dummy_inputs(device)
         inputs["num_inference_steps"] = 4
         frames = pipe(**inputs).frames
         image_slice_pab_enabled = frames[0, -2:, -1, -3:, -3:]
 
-        pipe.disable_pyramid_attention_broadcast()
-        assert not pipe.pyramid_attention_broadcast_enabled
-
-        inputs = self.get_dummy_inputs(device)
-        frames = pipe(**inputs).frames
-        image_slice_pab_disabled = frames[0, -2:, -1, -3:, -3:]
-
         # We need to use higher tolerance because we are using a random model. With a converged/trained
         # model, the tolerance can be lower.
         assert np.allclose(
-            original_image_slice, image_slice_pab_enabled, atol=0.25
+            original_image_slice, image_slice_pab_enabled, atol=0.2
         ), "PAB outputs should not differ much in specified timestep range."
-        print((image_slice_pab_disabled - image_slice_pab_enabled).abs().max())
-        assert np.allclose(
-            image_slice_pab_enabled, image_slice_pab_disabled, atol=0.25
-        ), "Outputs, with PAB enabled, shouldn't differ much when PAB is disabled in specified timestep range."
-        assert np.allclose(
-            original_image_slice, image_slice_pab_disabled, atol=0.25
-        ), "Original outputs should match when PAB is disabled."
 
 
 @slow