fix dtype checks in pipeline.

sayakpaul · sayakpaul · commit 0ae70fe27c81 · 2024-10-18T13:26:36.000+05:30
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -450,7 +450,7 @@ def module_is_offloaded(module):
             # https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly.
             if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"):
                 module.to(device=device)
-            elif not is_loaded_in_8bit_bnb:
+            elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb:
                 module.to(device, dtype)
 
             module_has_int_weights = any(
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -260,7 +260,7 @@ def test_device_assignment(self):
     def test_device_and_dtype_assignment(self):
         r"""
         Test whether trying to cast (or assigning a device to) a model after converting it in 4-bit will throw an error.
-        Checks also if other models are casted correctly.
+        Checks also if other models are casted correctly. Device placement, however, is supported.
         """
         with self.assertRaises(ValueError):
             # Tries with a `dtype`
@@ -278,6 +278,9 @@ def test_device_and_dtype_assignment(self):
             # Tries with a cast
             self.model_4bit.half()
 
+        # This should work
+        self.model_4bit.to("cuda")
+
         # Test if we did not break anything
         self.model_fp16 = self.model_fp16.to(dtype=torch.float32, device=torch_device)
         input_dict_for_transformer = self.get_dummy_inputs()