huggingface · sayakpaul · Oct 31, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/src/diffusers/loaders/lora_base.py b/src/diffusers/loaders/lora_base.py
@@ -31,6 +31,7 @@
     delete_adapter_layers,
     deprecate,
     is_accelerate_available,
+    is_accelerate_version,
     is_peft_available,
     is_transformers_available,
     logging,
@@ -214,9 +215,18 @@ def _optionally_disable_offloading(cls, _pipeline):
         is_model_cpu_offload = False
         is_sequential_cpu_offload = False
 
+        def model_has_device_map(model):
+            if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
+                return False
+            return hasattr(model, "hf_device_map") and model.hf_device_map is not None
+
         if _pipeline is not None and _pipeline.hf_device_map is None:
             for _, component in _pipeline.components.items():
-                if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
+                if (
+                    isinstance(component, nn.Module)
+                    and hasattr(component, "_hf_hook")
+                    and not model_has_device_map(component)
+                ):
                     if not is_model_cpu_offload:
                         is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
                     if not is_sequential_cpu_offload:

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -387,6 +387,11 @@ def to(self, *args, **kwargs):
 
         device = device or device_arg
 
+        def model_has_device_map(model):
+            if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
+                return False
+            return getattr(model, "hf_device_map", None) is not None
+
         # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU.
         def module_is_sequentially_offloaded(module):
             if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
@@ -404,6 +409,13 @@ def module_is_offloaded(module):
 
             return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
 
+        # device-mapped modules should not go through any device placements.
+        pipeline_has_device_mapped_modules = any(model_has_device_map(module) for _, module in self.components.items())
+        if pipeline_has_device_mapped_modules:
+            raise ValueError(
+                "It seems like you have device-mapped modules in the pipeline which doesn't allow explicit device placement using `to()`."
+            )
+
         # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
         pipeline_is_sequentially_offloaded = any(
             module_is_sequentially_offloaded(module) for _, module in self.components.items()
@@ -976,6 +988,19 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
                 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
                 default to "cuda".
         """
+
+        def model_has_device_map(model):
+            if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
+                return False
+            return getattr(model, "hf_device_map", None) is not None
+
+        # device-mapped modules should not go through any device placements.
+        pipeline_has_device_mapped_modules = any(model_has_device_map(module) for _, module in self.components.items())
+        if pipeline_has_device_mapped_modules:
+            raise ValueError(
+                "It seems like you have device-mapped modules in the pipeline which doesn't allow explicit device placement using `to()`."
+            )
+
         is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
         if is_pipeline_device_mapped:
             raise ValueError(
@@ -1069,6 +1094,19 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un
                 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
                 default to "cuda".
         """
+
+        def model_has_device_map(model):
+            if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
+                return False
+            return getattr(model, "hf_device_map", None) is not None
+
+        # device-mapped modules should not go through any device placements.
+        pipeline_has_device_mapped_modules = any(model_has_device_map(module) for _, module in self.components.items())
+        if pipeline_has_device_mapped_modules:
+            raise ValueError(
+                "It seems like you have device-mapped modules in the pipeline which doesn't allow explicit device placement using `to()`."
+            )
+
         if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
             from accelerate import cpu_offload
         else: