Merge branch 'main' into enable-compilation

sayakpaul · web-flow · commit 0305b5a90512 · 2025-08-07T13:38:58.000+05:30
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -139,6 +139,7 @@
             "AutoGuidance",
             "ClassifierFreeGuidance",
             "ClassifierFreeZeroStarGuidance",
+            "FrequencyDecoupledGuidance",
             "PerturbedAttentionGuidance",
             "SkipLayerGuidance",
             "SmoothedEnergyGuidance",
@@ -804,6 +805,7 @@
             AutoGuidance,
             ClassifierFreeGuidance,
             ClassifierFreeZeroStarGuidance,
+            FrequencyDecoupledGuidance,
             PerturbedAttentionGuidance,
             SkipLayerGuidance,
             SmoothedEnergyGuidance,
diff --git a/src/diffusers/guiders/__init__.py b/src/diffusers/guiders/__init__.py
@@ -22,6 +22,7 @@
     from .auto_guidance import AutoGuidance
     from .classifier_free_guidance import ClassifierFreeGuidance
     from .classifier_free_zero_star_guidance import ClassifierFreeZeroStarGuidance
+    from .frequency_decoupled_guidance import FrequencyDecoupledGuidance
     from .perturbed_attention_guidance import PerturbedAttentionGuidance
     from .skip_layer_guidance import SkipLayerGuidance
     from .smoothed_energy_guidance import SmoothedEnergyGuidance
@@ -32,6 +33,7 @@
         AutoGuidance,
         ClassifierFreeGuidance,
         ClassifierFreeZeroStarGuidance,
+        FrequencyDecoupledGuidance,
         PerturbedAttentionGuidance,
         SkipLayerGuidance,
         SmoothedEnergyGuidance,
diff --git a/src/diffusers/guiders/frequency_decoupled_guidance.py b/src/diffusers/guiders/frequency_decoupled_guidance.py
diff --git a/src/diffusers/models/transformers/transformer_flux.py b/src/diffusers/models/transformers/transformer_flux.py
@@ -384,7 +384,7 @@ def forward(
         temb: torch.Tensor,
         image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
         joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-    ) -> torch.Tensor:
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
         text_seq_len = encoder_hidden_states.shape[1]
         hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
 
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
@@ -82,6 +82,7 @@
     is_k_diffusion_available,
     is_k_diffusion_version,
     is_kernels_available,
+    is_kornia_available,
     is_librosa_available,
     is_matplotlib_available,
     is_nltk_available,
diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py
@@ -62,6 +62,21 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["torch"])
 
 
+class FrequencyDecoupledGuidance(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+
 class PerturbedAttentionGuidance(metaclass=DummyObject):
     _backends = ["torch"]
 
diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py
@@ -224,6 +224,7 @@ def _is_package_available(pkg_name: str, get_dist_name: bool = False) -> Tuple[b
 _sageattention_available, _sageattention_version = _is_package_available("sageattention")
 _flash_attn_available, _flash_attn_version = _is_package_available("flash_attn")
 _flash_attn_3_available, _flash_attn_3_version = _is_package_available("flash_attn_3")
+_kornia_available, _kornia_version = _is_package_available("kornia")
 
 
 def is_torch_available():
@@ -398,6 +399,10 @@ def is_flash_attn_3_available():
     return _flash_attn_3_available
 
 
+def is_kornia_available():
+    return _kornia_available
+
+
 # docstyle-ignore
 FLAX_IMPORT_ERROR = """
 {0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -886,6 +886,7 @@ def quantization_config(self):
             components_to_quantize=["transformer", "text_encoder_2"],
         )
 
+    @require_bitsandbytes_version_greater("0.46.1")
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super().test_torch_compile()
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -847,6 +847,10 @@ def quantization_config(self):
             components_to_quantize=["transformer", "text_encoder_2"],
         )
 
+    @pytest.mark.xfail(
+        reason="Test fails because of an offloading problem from Accelerate with confusion in hooks."
+        " Test passes without recompilation context manager. Refer to https://github.com/huggingface/diffusers/pull/12002/files#r2240462757 for details."
+    )
     def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile(torch_dtype=torch.float16)
diff --git a/tests/quantization/test_torch_compile_utils.py b/tests/quantization/test_torch_compile_utils.py
@@ -56,12 +56,18 @@ def _test_torch_compile(self, torch_dtype=torch.bfloat16):
         pipe.transformer.compile(fullgraph=True)
 
         # small resolutions to ensure speedy execution.
-        pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
+        with torch._dynamo.config.patch(error_on_recompile=True):
+            pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
 
     def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16):
         pipe = self._init_pipeline(self.quantization_config, torch_dtype)
         pipe.enable_model_cpu_offload()
-        pipe.transformer.compile()
+        # regional compilation is better for offloading.
+        # see: https://pytorch.org/blog/torch-compile-and-diffusers-a-hands-on-guide-to-peak-performance/
+        if getattr(pipe.transformer, "_repeated_blocks"):
+            pipe.transformer.compile_repeated_blocks(fullgraph=True)
+        else:
+            pipe.transformer.compile()
 
         # small resolutions to ensure speedy execution.
         pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)

Original file line number	Diff line number	Diff line change
`@@ -886,6 +886,7 @@ def quantization_config(self):`
`886`	`886`	`components_to_quantize=["transformer", "text_encoder_2"],`
`887`	`887`	`)`
`888`	`888`
	`889`	`+ @require_bitsandbytes_version_greater("0.46.1")`
`889`	`890`	`def test_torch_compile(self):`
`890`	`891`	`torch._dynamo.config.capture_dynamic_output_shape_ops = True`
`891`	`892`	`super().test_torch_compile()`