fix conflicts.

sayakpaul · sayakpaul · commit c062b08ef569 · 2025-04-28T10:08:00.000+08:00
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
@@ -1186,6 +1186,13 @@ def _is_torch_fp64_available(device):
         "mps": 0,
         "default": 0,
     }
+    BACKEND_SYNCHRONIZE = {
+        "cuda": torch.cuda.synchronize,
+        "xpu": getattr(torch.xpu, "synchronize", None),
+        "cpu": None,
+        "mps": None,
+        "default": None,
+    }
 
 
 # This dispatches a defined function according to the accelerator from the function definitions.
@@ -1208,6 +1215,10 @@ def backend_manual_seed(device: str, seed: int):
     return _device_agnostic_dispatch(device, BACKEND_MANUAL_SEED, seed)
 
 
+def backend_synchronize(device: str):
+    return _device_agnostic_dispatch(device, BACKEND_SYNCHRONIZE)
+
+
 def backend_empty_cache(device: str):
     return _device_agnostic_dispatch(device, BACKEND_EMPTY_CACHE)
 
diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
@@ -59,6 +59,9 @@
 from diffusers.utils.testing_utils import (
     CaptureLogger,
     backend_empty_cache,
+    backend_max_memory_allocated,
+    backend_reset_peak_memory_stats,
+    backend_synchronize,
     get_python_version,
     is_torch_compile,
     numpy_cosine_similarity_distance,
@@ -340,7 +343,7 @@ def test_weight_overwrite(self):
 
         assert model.config.in_channels == 9
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_keep_modules_in_fp32(self):
         r"""
         A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32 when we load the model in fp16/bf16
@@ -1479,16 +1482,16 @@ def test_layerwise_casting(storage_dtype, compute_dtype):
         test_layerwise_casting(torch.float8_e5m2, torch.float32)
         test_layerwise_casting(torch.float8_e4m3fn, torch.bfloat16)
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_layerwise_casting_memory(self):
         MB_TOLERANCE = 0.2
         LEAST_COMPUTE_CAPABILITY = 8.0
 
         def reset_memory_stats():
             gc.collect()
-            torch.cuda.synchronize()
-            torch.cuda.empty_cache()
-            torch.cuda.reset_peak_memory_stats()
+            backend_synchronize(torch_device)
+            backend_empty_cache(torch_device)
+            backend_reset_peak_memory_stats(torch_device)
 
         def get_memory_usage(storage_dtype, compute_dtype):
             torch.manual_seed(0)
@@ -1501,7 +1504,7 @@ def get_memory_usage(storage_dtype, compute_dtype):
             reset_memory_stats()
             model(**inputs_dict)
             model_memory_footprint = model.get_memory_footprint()
-            peak_inference_memory_allocated_mb = torch.cuda.max_memory_allocated() / 1024**2
+            peak_inference_memory_allocated_mb = backend_max_memory_allocated(torch_device) / 1024**2
 
             return model_memory_footprint, peak_inference_memory_allocated_mb
 
@@ -1511,7 +1514,7 @@ def get_memory_usage(storage_dtype, compute_dtype):
             torch.float8_e4m3fn, torch.bfloat16
         )
 
-        compute_capability = get_torch_cuda_device_capability()
+        compute_capability = get_torch_cuda_device_capability() if torch_device == "cuda" else None
         self.assertTrue(fp8_e4m3_bf16_memory_footprint < fp8_e4m3_fp32_memory_footprint < fp32_memory_footprint)
         # NOTE: the following assertion would fail on our CI (running Tesla T4) due to bf16 using more memory than fp32.
         # On other devices, such as DGX (Ampere) and Audace (Ada), the test passes. So, we conditionally check it.
@@ -1526,7 +1529,7 @@ def get_memory_usage(storage_dtype, compute_dtype):
         )
 
     @parameterized.expand([False, True])
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_group_offloading(self, record_stream):
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
         torch.manual_seed(0)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
@@ -53,7 +53,7 @@
     require_accelerator,
     require_hf_hub_version_greater,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     require_transformers_version_greater,
     skip_mps,
     torch_device,
@@ -2212,7 +2212,7 @@ def test_layerwise_casting_inference(self):
         inputs = self.get_dummy_inputs(torch_device)
         _ = pipe(**inputs)[0]
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_group_offloading_inference(self):
         if not self.test_group_offloading:
             return
diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py
@@ -6,10 +6,13 @@
 from diffusers.models.attention_processor import Attention
 from diffusers.utils import is_optimum_quanto_available, is_torch_available
 from diffusers.utils.testing_utils import (
+    backend_empty_cache,
+    backend_reset_peak_memory_stats,
+    enable_full_determinism,
     nightly,
     numpy_cosine_similarity_distance,
     require_accelerate,
-    require_big_gpu_with_torch_cuda,
+    require_big_accelerator,
     require_torch_cuda_compatibility,
     torch_device,
 )
@@ -23,9 +26,11 @@
 
     from ..utils import LoRALayer, get_memory_consumption_stat
 
+enable_full_determinism()
+
 
 @nightly
-@require_big_gpu_with_torch_cuda
+@require_big_accelerator
 @require_accelerate
 class QuantoBaseTesterMixin:
     model_id = None
@@ -39,13 +44,13 @@ class QuantoBaseTesterMixin:
     _test_torch_compile = False
 
     def setUp(self):
-        torch.cuda.reset_peak_memory_stats()
-        torch.cuda.empty_cache()
+        backend_reset_peak_memory_stats(torch_device)
+        backend_empty_cache(torch_device)
         gc.collect()
 
     def tearDown(self):
-        torch.cuda.reset_peak_memory_stats()
-        torch.cuda.empty_cache()
+        backend_reset_peak_memory_stats(torch_device)
+        backend_empty_cache(torch_device)
         gc.collect()
 
     def get_dummy_init_kwargs(self):
@@ -89,7 +94,7 @@ def test_keep_modules_in_fp32(self):
         self.model_cls._keep_in_fp32_modules = self.keep_in_fp32_module
 
         model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs())
-        model.to("cuda")
+        model.to(torch_device)
 
         for name, module in model.named_modules():
             if isinstance(module, torch.nn.Linear):
@@ -107,7 +112,7 @@ def test_modules_to_not_convert(self):
         init_kwargs.update({"quantization_config": quantization_config})
 
         model = self.model_cls.from_pretrained(**init_kwargs)
-        model.to("cuda")
+        model.to(torch_device)
 
         for name, module in model.named_modules():
             if name in self.modules_to_not_convert:
@@ -122,7 +127,8 @@ def test_dtype_assignment(self):
 
         with self.assertRaises(ValueError):
             # Tries with a `device` and `dtype`
-            model.to(device="cuda:0", dtype=torch.float16)
+            device_0 = f"{torch_device}:0"
+            model.to(device=device_0, dtype=torch.float16)
 
         with self.assertRaises(ValueError):
             # Tries with a cast
@@ -133,7 +139,7 @@ def test_dtype_assignment(self):
             model.half()
 
         # This should work
-        model.to("cuda")
+        model.to(torch_device)
 
     def test_serialization(self):
         model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs())