update

DN6 · DN6 · commit 12eeb252d5cc · 2025-03-25T18:41:45.000+01:00
diff --git a/src/diffusers/models/model_loading_utils.py b/src/diffusers/models/model_loading_utils.py
@@ -264,7 +264,8 @@ def load_model_dict_into_meta(
             old_param = None
 
         if old_param is not None:
-            if dtype is None:
+            # Do not cast parameters if the model is quantized
+            if dtype is None and hf_quantizer is None:
                 param = param.to(old_param.dtype)
 
             if old_param.is_contiguous():
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
@@ -320,21 +320,6 @@ def require_torch_multi_gpu(test_case):
     return unittest.skipUnless(torch.cuda.device_count() > 1, "test requires multiple GPUs")(test_case)
 
 
-def require_torch_multi_accelerator(test_case):
-    """
-    Decorator marking a test that requires a multi-accelerator setup (in PyTorch). These tests are skipped on a machine
-    without multiple hardware accelerators.
-    """
-    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
-
-    import torch
-
-    return unittest.skipUnless(
-        torch.cuda.device_count() > 1 or torch.xpu.device_count() > 1, "test requires multiple hardware accelerators"
-    )(test_case)
-
-
 def require_torch_accelerator_with_fp16(test_case):
     """Decorator marking a test that requires an accelerator with support for the FP16 data type."""
     return unittest.skipUnless(_is_torch_fp16_available(torch_device), "test requires accelerator with fp16 support")(
@@ -369,31 +354,6 @@ def require_big_gpu_with_torch_cuda(test_case):
     )(test_case)
 
 
-def require_big_accelerator(test_case):
-    """
-    Decorator marking a test that requires a bigger hardware accelerator (24GB) for execution. Some example pipelines:
-    Flux, SD3, Cog, etc.
-    """
-    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
-
-    import torch
-
-    if not (torch.cuda.is_available() or torch.xpu.is_available()):
-        return unittest.skip("test requires PyTorch CUDA")(test_case)
-
-    if torch.xpu.is_available():
-        device_properties = torch.xpu.get_device_properties(0)
-    else:
-        device_properties = torch.cuda.get_device_properties(0)
-
-    total_memory = device_properties.total_memory / (1024**3)
-    return unittest.skipUnless(
-        total_memory >= BIG_GPU_MEMORY,
-        f"test requires a hardware accelerator with at least {BIG_GPU_MEMORY} GB memory",
-    )(test_case)
-
-
 def require_torch_accelerator_with_training(test_case):
     """Decorator marking a test that requires an accelerator with support for training."""
     return unittest.skipUnless(
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -90,13 +90,16 @@ class Base8bitTests(unittest.TestCase):
 
     def get_dummy_inputs(self):
         prompt_embeds = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt",
+            map_location="cpu",
         )
         pooled_prompt_embeds = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt",
+            map_location="cpu",
         )
         latent_model_input = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt",
+            map_location="cpu",
         )
 
         input_dict_for_transformer = {

Original file line number	Diff line number	Diff line change
`@@ -90,13 +90,16 @@ class Base8bitTests(unittest.TestCase):`
`90`	`90`
`91`	`91`	`def get_dummy_inputs(self):`
`92`	`92`	`prompt_embeds = load_pt(`
`93`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt"`
	`93`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt",`
	`94`	`+ map_location="cpu",`
`94`	`95`	`)`
`95`	`96`	`pooled_prompt_embeds = load_pt(`
`96`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt"`
	`97`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt",`
	`98`	`+ map_location="cpu",`
`97`	`99`	`)`
`98`	`100`	`latent_model_input = load_pt(`
`99`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt"`
	`101`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt",`
	`102`	`+ map_location="cpu",`
`100`	`103`	`)`
`101`	`104`
`102`	`105`	`input_dict_for_transformer = {`