raise errors if multiple offloading strategies used; add relevant tests

a-r-r-o-w · a-r-r-o-w · commit 8804d746f09c · 2025-02-06T13:33:31.000+01:00
diff --git a/src/diffusers/hooks/group_offloading.py b/src/diffusers/hooks/group_offloading.py
@@ -22,6 +22,7 @@
 
 
 if is_accelerate_available():
+    from accelerate.hooks import AlignDevicesHook, CpuOffload
     from accelerate.utils import send_to_device
 
 
@@ -341,6 +342,8 @@ def apply_group_offloading(
         else:
             raise ValueError("Using streams for data transfer requires a CUDA device.")
 
+    _raise_error_if_accelerate_model_or_sequential_hook_present(module)
+
     if offload_type == "block_level":
         if num_blocks_per_group is None:
             raise ValueError("num_blocks_per_group must be provided when using offload_type='block_level'.")
@@ -645,3 +648,17 @@ def _find_parent_module_in_module_dict(name: str, module_dict: Dict[str, torch.n
             return parent_name
         atoms.pop()
     return ""
+
+
+def _raise_error_if_accelerate_model_or_sequential_hook_present(module: torch.nn.Module) -> None:
+    if not is_accelerate_available():
+        return
+    for name, submodule in module.named_modules():
+        if not hasattr(submodule, "_hf_hook"):
+            continue
+        if isinstance(submodule._hf_hook, (AlignDevicesHook, CpuOffload)):
+            raise ValueError(
+                f"Cannot apply group offloading to a module that is already applying an alternative "
+                f"offloading strategy from Accelerate. If you want to apply group offloading, please "
+                f"disable the existing offloading strategy first. Offending module: {name} ({type(submodule)})"
+            )
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -1075,6 +1075,8 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
                 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
                 default to "cuda".
         """
+        self._check_group_offloading_inactive_or_raise_error()
+
         is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
         if is_pipeline_device_mapped:
             raise ValueError(
@@ -1186,6 +1188,8 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un
                 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
                 default to "cuda".
         """
+        self._check_group_offloading_inactive_or_raise_error()
+
         if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
             from accelerate import cpu_offload
         else:
@@ -1910,6 +1914,24 @@ def from_pipe(cls, pipeline, **kwargs):
 
         return new_pipeline
 
+    def _check_group_offloading_inactive_or_raise_error(self) -> None:
+        from ..hooks import HookRegistry
+        from ..hooks.group_offloading import _GROUP_OFFLOADING
+
+        for name, component in self.components.items():
+            if not isinstance(component, torch.nn.Module):
+                continue
+            for module in component.modules():
+                if not hasattr(module, "_diffusers_hook"):
+                    continue
+                registry: HookRegistry = module._diffusers_hook
+                if registry.get_hook(_GROUP_OFFLOADING) is not None:
+                    raise ValueError(
+                        f"You are trying to apply model/sequential CPU offloading to a pipeline that contains "
+                        f"components with group offloading enabled. This is not supported. Please disable group "
+                        f"offloading for the '{name}' component of the pipeline to use other offloading methods."
+                    )
+
 
 class StableDiffusionMixin:
     r"""
diff --git a/tests/hooks/test_group_offloading.py b/tests/hooks/test_group_offloading.py
@@ -18,6 +18,7 @@
 import torch
 
 from diffusers.models import ModelMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.utils.testing_utils import require_torch_gpu, torch_device
 
 
@@ -56,6 +57,20 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x
 
 
+class DummyPipeline(DiffusionPipeline):
+    model_cpu_offload_seq = "model"
+
+    def __init__(self, model: torch.nn.Module) -> None:
+        super().__init__()
+
+        self.register_modules(model=model)
+
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        for _ in range(2):
+            x = x + 0.1 * self.model(x)
+        return x
+
+
 @require_torch_gpu
 class GroupOffloadTests(unittest.TestCase):
     in_features = 64
@@ -151,3 +166,27 @@ def test_error_raised_if_supports_group_offloading_false(self):
         self.model._supports_group_offloading = False
         with self.assertRaisesRegex(ValueError, "does not support group offloading"):
             self.model.enable_group_offload(onload_device=torch.device("cuda"))
+
+    def test_error_raised_if_model_offloading_applied_on_group_offloaded_module(self):
+        pipe = DummyPipeline(self.model)
+        pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)
+        with self.assertRaisesRegex(ValueError, "You are trying to apply model/sequential CPU offloading"):
+            pipe.enable_model_cpu_offload()
+
+    def test_error_raised_if_sequential_offloading_applied_on_group_offloaded_module(self):
+        pipe = DummyPipeline(self.model)
+        pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)
+        with self.assertRaisesRegex(ValueError, "You are trying to apply model/sequential CPU offloading"):
+            pipe.enable_sequential_cpu_offload()
+
+    def test_error_raised_if_group_offloading_applied_on_model_offloaded_module(self):
+        pipe = DummyPipeline(self.model)
+        pipe.enable_model_cpu_offload()
+        with self.assertRaisesRegex(ValueError, "Cannot apply group offloading"):
+            pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)
+
+    def test_error_raised_if_group_offloading_applied_on_sequential_offloaded_module(self):
+        pipe = DummyPipeline(self.model)
+        pipe.enable_sequential_cpu_offload()
+        with self.assertRaisesRegex(ValueError, "Cannot apply group offloading"):
+            pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)