update

faaany · faaany · commit f3a519fd803f · 2025-01-05T23:04:43.000-08:00
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
@@ -1085,8 +1085,8 @@ def _is_torch_fp64_available(device):
         "default": torch.manual_seed,
     }
     BACKEND_RESET_PEAK_MEMORY_STATS = {
-        "cuda": torch.cuda.reset_peak_memory_stats(),
-        "xpu": torch.xpu.reset_peak_memory_stats(),
+        "cuda": torch.cuda.reset_peak_memory_stats,
+        "xpu": torch.xpu.reset_peak_memory_stats,
         "default": None,
     }
 
diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting.py b/tests/pipelines/deepfloyd_if/test_if_inpainting.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import gc
 import random
 import unittest
 
@@ -24,9 +23,10 @@
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
     floats_tensor,
+    flush_memory,
     load_numpy,
     require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
     skip_mps,
     slow,
     torch_device,
@@ -99,30 +99,26 @@ def test_inference_batch_single_identical(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class IFInpaintingPipelineSlowTests(unittest.TestCase):
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def test_if_inpainting(self):
         pipe = IFInpaintingPipeline.from_pretrained(
             "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
         )
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
+        flush_memory(torch_device, reset_mem_stats=True)
 
         image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
         mask_image = floats_tensor((1, 3, 64, 64), rng=random.Random(1)).to(torch_device)
diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import gc
 import random
 import unittest
 
@@ -24,9 +23,10 @@
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
     floats_tensor,
+    flush_memory,
     load_numpy,
     require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
     skip_mps,
     slow,
     torch_device,
@@ -101,31 +101,27 @@ def test_inference_batch_single_identical(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def test_if_inpainting_superresolution(self):
         pipe = IFInpaintingSuperResolutionPipeline.from_pretrained(
             "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16
         )
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         # Super resolution test
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
+        flush_memory(torch_device, reset_mem_stats=True)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
 
@@ -147,7 +143,10 @@ def test_if_inpainting_superresolution(self):
 
         assert image.shape == (256, 256, 3)
 
-        mem_bytes = torch.cuda.max_memory_allocated()
+        if torch_device == "cuda":
+            mem_bytes = torch.cuda.max_memory_allocated()
+        elif torch_device == "xpu":
+            mem_bytes = torch.xpu.max_memory_allocated()
         assert mem_bytes < 12 * 10**9
 
         expected_image = load_numpy(
diff --git a/tests/pipelines/deepfloyd_if/test_if_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_superresolution.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import gc
 import random
 import unittest
 
@@ -24,9 +23,10 @@
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
     floats_tensor,
+    flush_memory,
     load_numpy,
     require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
     skip_mps,
     slow,
     torch_device,
@@ -94,31 +94,27 @@ def test_inference_batch_single_identical(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
+        flush_memory(torch_device, gc_collect=True)
 
     def test_if_superresolution(self):
         pipe = IFSuperResolutionPipeline.from_pretrained(
             "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16
         )
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         # Super resolution test
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
+        flush_memory(torch_device, reset_mem_stats=True)
 
         image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
         generator = torch.Generator(device="cpu").manual_seed(0)
@@ -134,7 +130,10 @@ def test_if_superresolution(self):
 
         assert image.shape == (256, 256, 3)
 
-        mem_bytes = torch.cuda.max_memory_allocated()
+        if torch_device == "cuda":
+            mem_bytes = torch.cuda.max_memory_allocated()
+        elif torch_device == "xpu":
+            mem_bytes = torch.xpu.max_memory_allocated()
         assert mem_bytes < 12 * 10**9
 
         expected_image = load_numpy(

Original file line number	Diff line number	Diff line change
`@@ -1085,8 +1085,8 @@ def _is_torch_fp64_available(device):`
`1085`	`1085`	`"default": torch.manual_seed,`
`1086`	`1086`	`}`
`1087`	`1087`	`BACKEND_RESET_PEAK_MEMORY_STATS = {`
`1088`		`- "cuda": torch.cuda.reset_peak_memory_stats(),`
`1089`		`- "xpu": torch.xpu.reset_peak_memory_stats(),`
	`1088`	`+ "cuda": torch.cuda.reset_peak_memory_stats,`
	`1089`	`+ "xpu": torch.xpu.reset_peak_memory_stats,`
`1090`	`1090`	`"default": None,`
`1091`	`1091`	`}`
`1092`	`1092`