fixes

sayakpaul · sayakpaul · commit 7d473649d393 · 2024-12-02T16:24:08.000+05:30
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -392,6 +392,13 @@ def to(self, *args, **kwargs):
 
         device = device or device_arg
 
+        pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items())
+        # PR: https://github.com/huggingface/accelerate/pull/3223/
+        if pipeline_has_bnb and torch.device(device).type == "cuda" and is_accelerate_version("<", "1.1.0.dev0"):
+            raise ValueError(
+                "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation."
+            )
+
         # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU.
         def module_is_sequentially_offloaded(module):
             if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
@@ -424,15 +431,6 @@ def module_is_offloaded(module):
                 "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` first and then call `to()`."
             )
 
-        pipeline_has_bnb = any(
-            (_check_bnb_status(module)[1] or _check_bnb_status(module)[-1]) for _, module in self.components.items()
-        )
-        # PR: https://github.com/huggingface/accelerate/pull/3223/
-        if pipeline_has_bnb and torch.device(device).type == "cuda" and is_accelerate_version("<", "1.1.0.dev0"):
-            raise ValueError(
-                "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation."
-            )
-
         # Display a warning in this case (the operation succeeds but the benefits are lost)
         pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
         if pipeline_is_offloaded and device and torch.device(device).type == "cuda":
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -18,19 +18,20 @@
 import unittest
 
 import numpy as np
+import pytest
 import safetensors.torch
 
 from diffusers import BitsAndBytesConfig, DiffusionPipeline, FluxTransformer2DModel, SD3Transformer2DModel
 from diffusers.utils import logging
 from diffusers.utils.testing_utils import (
     CaptureLogger,
+    is_accelerate_version,
     is_bitsandbytes_available,
     is_torch_available,
     is_transformers_available,
     load_pt,
     numpy_cosine_similarity_distance,
     require_accelerate,
-    require_accelerate_version_greater,
     require_bitsandbytes_version_greater,
     require_torch,
     require_torch_gpu,
@@ -485,7 +486,11 @@ def test_moving_to_cpu_throws_warning(self):
 
         assert "Pipelines loaded with `dtype=torch.float16`" in cap_logger.out
 
-    @require_accelerate_version_greater("1.0.0")
+    @pytest.mark.xfail(
+        condtion=is_accelerate_version("<=", "1.1.1"),
+        reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.",
+        strict=True,
+    )
     def test_pipeline_cuda_placement_works_with_nf4(self):
         transformer_nf4_config = BitsAndBytesConfig(
             load_in_4bit=True,
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -17,17 +17,18 @@
 import unittest
 
 import numpy as np
+import pytest
 
 from diffusers import BitsAndBytesConfig, DiffusionPipeline, FluxTransformer2DModel, SD3Transformer2DModel, logging
 from diffusers.utils.testing_utils import (
     CaptureLogger,
+    is_accelerate_version,
     is_bitsandbytes_available,
     is_torch_available,
     is_transformers_available,
     load_pt,
     numpy_cosine_similarity_distance,
     require_accelerate,
-    require_accelerate_version_greater,
     require_bitsandbytes_version_greater,
     require_torch,
     require_torch_gpu,
@@ -434,7 +435,11 @@ def test_generate_quality_dequantize(self):
             output_type="np",
         ).images
 
-    @require_accelerate_version_greater("1.0.0")
+    @pytest.mark.xfail(
+        condtion=is_accelerate_version("<=", "1.1.1"),
+        reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.",
+        strict=True,
+    )
     def test_pipeline_cuda_placement_works_with_mixed_int8(self):
         transformer_8bit_config = BitsAndBytesConfig(load_in_8bit=True)
         transformer_8bit = SD3Transformer2DModel.from_pretrained(