improvements

AlanPonnachan · AlanPonnachan · commit 0a05bec566f9 · 2025-12-07T09:04:31.000Z
diff --git a/src/diffusers/hooks/_helpers.py b/src/diffusers/hooks/_helpers.py
@@ -169,7 +169,7 @@ def _register_attention_processors_metadata():
 
 
 def _register_transformer_blocks_metadata():
-    from ..models.attention import BasicTransformerBlock
+    from ..models.attention import BasicTransformerBlock, JointTransformerBlock
     from ..models.transformers.cogvideox_transformer_3d import CogVideoXBlock
     from ..models.transformers.transformer_bria import BriaTransformerBlock
     from ..models.transformers.transformer_cogview4 import CogView4TransformerBlock
@@ -189,6 +189,7 @@ def _register_transformer_blocks_metadata():
     from ..models.transformers.transformer_qwenimage import QwenImageTransformerBlock
     from ..models.transformers.transformer_wan import WanTransformerBlock
     from ..models.transformers.transformer_z_image import ZImageTransformerBlock
+    from ..models.transformers.transformer_kandinsky import Kandinsky5TransformerDecoderBlock
 
     # BasicTransformerBlock
     TransformerBlockRegistry.register(
@@ -332,6 +333,25 @@ def _register_transformer_blocks_metadata():
     )
 
 
+    TransformerBlockRegistry.register(
+        model_class=JointTransformerBlock,
+        metadata=TransformerBlockMetadata(
+            return_hidden_states_index=1,
+            return_encoder_hidden_states_index=0,
+        ),
+    )
+
+
+    # Kandinsky 5.0 (Kandinsky5TransformerDecoderBlock)
+    TransformerBlockRegistry.register(
+        model_class=Kandinsky5TransformerDecoderBlock,
+        metadata=TransformerBlockMetadata(
+            return_hidden_states_index=0,
+            return_encoder_hidden_states_index=None,
+        ),
+    )
+
+
 # fmt: off
 def _skip_attention___ret___hidden_states(self, *args, **kwargs):
     hidden_states = kwargs.get("hidden_states", None)
diff --git a/src/diffusers/hooks/mag_cache.py b/src/diffusers/hooks/mag_cache.py
@@ -115,7 +115,7 @@ class MagCacheConfig:
     calibrate: bool = False
 
     def __post_init__(self):
-        # Strict validation: User MUST provide ratios OR enable calibration.
+        # User MUST provide ratios OR enable calibration.
         if self.mag_ratios is None and not self.calibrate:
             raise ValueError(
                 " `mag_ratios` must be provided for MagCache inference because these ratios are model-dependent.\n"
@@ -151,7 +151,7 @@ def __init__(self) -> None:
 
         # Current step counter (timestep index)
         self.step_index: int = 0
-        
+
         # Calibration storage
         self.calibration_ratios: List[float] = []
 
@@ -179,6 +179,9 @@ def initialize_hook(self, module):
         return module
 
     def new_forward(self, module: torch.nn.Module, *args, **kwargs):
+        if self.state_manager._current_context is None:
+            self.state_manager.set_context("inference")
+
         # Capture input hidden_states
         hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs)
 
@@ -225,6 +228,9 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
             output = hidden_states
             res = state.previous_residual
 
+            if res.device != output.device:
+                res = res.to(output.device)
+
             # Attempt to apply residual handling shape mismatches (e.g., text+image vs image only)
             if res.shape == output.shape:
                 output = output + res
@@ -320,7 +326,7 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
                 out_hidden = output
 
             in_hidden = state.head_block_input
-            
+
             # Determine residual
             if out_hidden.shape == in_hidden.shape:
                 residual = out_hidden - in_hidden
@@ -345,28 +351,28 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
     def _perform_calibration_step(self, state: MagCacheState, current_residual: torch.Tensor):
         if state.previous_residual is None:
             # First step has no previous residual to compare against.
-            # We log 1.0 as a neutral starting point.
+            # log 1.0 as a neutral starting point.
             ratio = 1.0
         else:
             # MagCache Calibration Formula: mean(norm(curr) / norm(prev))
             # norm(dim=-1) gives magnitude of each token vector
             curr_norm = torch.linalg.norm(current_residual.float(), dim=-1)
             prev_norm = torch.linalg.norm(state.previous_residual.float(), dim=-1)
-            
+
             # Avoid division by zero
             ratio = (curr_norm / (prev_norm + 1e-8)).mean().item()
-        
+
         state.calibration_ratios.append(ratio)
-    
+
     def _advance_step(self, state: MagCacheState):
         state.step_index += 1
         if state.step_index >= self.config.num_inference_steps:
             # End of inference loop
             if self.config.calibrate:
-                print(f"\n[MagCache] Calibration Complete. Copy these values to MagCacheConfig(mag_ratios=...):")
+                print("\n[MagCache] Calibration Complete. Copy these values to MagCacheConfig(mag_ratios=...):")
                 print(f"{state.calibration_ratios}\n")
                 logger.info(f"MagCache Calibration Results: {state.calibration_ratios}")
-            
+
             # Reset state
             state.step_index = 0
             state.accumulated_ratio = 1.0
@@ -386,6 +392,9 @@ def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None:
         config (`MagCacheConfig`):
             The configuration for MagCache.
     """
+    # Initialize registry on the root module so the Pipeline can set context.
+    HookRegistry.check_if_exists_or_initialize(module)
+
     state_manager = StateManager(MagCacheState, (), {})
     remaining_blocks = []
 
@@ -399,13 +408,11 @@ def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None:
         logger.warning("MagCache: No transformer blocks found to apply hooks.")
         return
 
+    # Handle single-block models
     if len(remaining_blocks) == 1:
-        # Single block case: It acts as both Head (Decision) and Tail (Residual Calc)
         name, block = remaining_blocks[0]
         logger.info(f"MagCache: Applying Head+Tail Hooks to single block '{name}'")
-        # Apply BlockHook (Tail) FIRST so it is the INNER wrapper
         _apply_mag_cache_block_hook(block, state_manager, config, is_tail=True)
-        # Apply HeadHook SECOND so it is the OUTER wrapper (controls flow)
         _apply_mag_cache_head_hook(block, state_manager, config)
         return
 
@@ -426,6 +433,11 @@ def _apply_mag_cache_head_hook(
     block: torch.nn.Module, state_manager: StateManager, config: MagCacheConfig
 ) -> None:
     registry = HookRegistry.check_if_exists_or_initialize(block)
+
+    # Automatically remove existing hook to allow re-application (e.g. switching modes)
+    if registry.get_hook(_MAG_CACHE_LEADER_BLOCK_HOOK) is not None:
+        registry.remove_hook(_MAG_CACHE_LEADER_BLOCK_HOOK)
+
     hook = MagCacheHeadHook(state_manager, config)
     registry.register_hook(hook, _MAG_CACHE_LEADER_BLOCK_HOOK)
 
@@ -437,5 +449,10 @@ def _apply_mag_cache_block_hook(
     is_tail: bool = False,
 ) -> None:
     registry = HookRegistry.check_if_exists_or_initialize(block)
+
+    # Automatically remove existing hook to allow re-application
+    if registry.get_hook(_MAG_CACHE_BLOCK_HOOK) is not None:
+        registry.remove_hook(_MAG_CACHE_BLOCK_HOOK)
+
     hook = MagCacheBlockHook(state_manager, is_tail, config)
-    registry.register_hook(hook, _MAG_CACHE_BLOCK_HOOK)
+    registry.register_hook(hook, _MAG_CACHE_BLOCK_HOOK)
diff --git a/tests/hooks/test_mag_cache.py b/tests/hooks/test_mag_cache.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-import torch
+
 import numpy as np
+import torch
 
 from diffusers import MagCacheConfig, apply_mag_cache
 from diffusers.hooks._helpers import TransformerBlockMetadata, TransformerBlockRegistry
@@ -46,7 +47,7 @@ def forward(self, hidden_states, encoder_hidden_states=None):
 class TupleOutputBlock(torch.nn.Module):
     def __init__(self):
         super().__init__()
-    
+
     def forward(self, hidden_states, encoder_hidden_states=None, **kwargs):
         # Returns a tuple
         return hidden_states * 2.0, encoder_hidden_states
@@ -88,7 +89,7 @@ def _set_context(self, model, context_name):
         for module in model.modules():
             if hasattr(module, "_diffusers_hook"):
                 module._diffusers_hook._set_context(context_name)
-    
+
     def _get_calibration_data(self, model):
         for module in model.modules():
             if hasattr(module, "_diffusers_hook"):
@@ -143,25 +144,25 @@ def test_mag_cache_retention(self):
         """Test that retention_ratio prevents skipping even if error is low."""
         model = DummyTransformer()
         # Ratios that imply 0 error, so it *would* skip if retention allowed it
-        ratios = np.array([1.0, 1.0]) 
-        
+        ratios = np.array([1.0, 1.0])
+
         config = MagCacheConfig(
             threshold=100.0,
             num_inference_steps=2,
             retention_ratio=1.0, # Force retention for ALL steps
             mag_ratios=ratios
         )
-        
+
         apply_mag_cache(model, config)
         self._set_context(model, "test_context")
-        
+
         # Step 0
         model(torch.tensor([[[10.0]]]))
-        
+
         # Step 1: Should COMPUTE (44.0) not SKIP (41.0) because of retention
         input_t1 = torch.tensor([[[11.0]]])
         output_t1 = model(input_t1)
-        
+
         self.assertTrue(
             torch.allclose(output_t1, torch.tensor([[[44.0]]])),
             f"Expected Compute (44.0) due to retention, got {output_t1.item()}"
@@ -171,29 +172,29 @@ def test_mag_cache_tuple_outputs(self):
         """Test compatibility with models returning (hidden, encoder_hidden) like Flux."""
         model = TupleTransformer()
         ratios = np.array([1.0, 1.0])
-        
+
         config = MagCacheConfig(
             threshold=100.0,
             num_inference_steps=2,
             retention_ratio=0.0,
             mag_ratios=ratios
         )
-        
+
         apply_mag_cache(model, config)
         self._set_context(model, "test_context")
-        
+
         # Step 0: Compute. Input 10.0 -> Output 20.0 (1 block * 2x)
         # Residual = 10.0
         input_t0 = torch.tensor([[[10.0]]])
         enc_t0 = torch.tensor([[[1.0]]])
         out_0, _ = model(input_t0, encoder_hidden_states=enc_t0)
         self.assertTrue(torch.allclose(out_0, torch.tensor([[[20.0]]])))
-        
+
         # Step 1: Skip. Input 11.0.
         # Skipped Output = 11 + 10 = 21.0
         input_t1 = torch.tensor([[[11.0]]])
         out_1, _ = model(input_t1, encoder_hidden_states=enc_t0)
-        
+
         self.assertTrue(
             torch.allclose(out_1, torch.tensor([[[21.0]]])),
             f"Tuple skip failed. Expected 21.0, got {out_1.item()}"
@@ -203,8 +204,8 @@ def test_mag_cache_reset(self):
         """Test that state resets correctly after num_inference_steps."""
         model = DummyTransformer()
         config = MagCacheConfig(
-            threshold=100.0, 
-            num_inference_steps=2, 
+            threshold=100.0,
+            num_inference_steps=2,
             retention_ratio=0.0,
             mag_ratios=np.array([1.0, 1.0])
         )
@@ -237,7 +238,7 @@ def test_mag_cache_calibration(self):
         # HeadInput = 10. Output = 40. Residual = 30.
         # Ratio 0 is placeholder 1.0
         model(torch.tensor([[[10.0]]]))
-        
+
         # Check intermediate state
         ratios = self._get_calibration_data(model)
         self.assertEqual(len(ratios), 1)
@@ -248,10 +249,10 @@ def test_mag_cache_calibration(self):
         # PrevResidual = 30. CurrResidual = 30.
         # Ratio = 30/30 = 1.0
         model(torch.tensor([[[10.0]]]))
-        
+
         # Verify it computes fully (no skip)
         # If it skipped, output would be 41.0. It should be 40.0
         # Actually in test setup, input is same (10.0) so output 40.0.
         # Let's ensure list is empty after reset (end of step 1)
         ratios_after = self._get_calibration_data(model)
-        self.assertEqual(ratios_after, [])
+        self.assertEqual(ratios_after, [])