handle .cuda()

a-r-r-o-w · a-r-r-o-w · commit 6be43b8a6bad · 2025-02-06T19:26:10.000+01:00
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -1229,6 +1229,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
     # Adapted from `transformers`.
     @wraps(torch.nn.Module.cuda)
     def cuda(self, *args, **kwargs):
+        from ..hooks.group_offloading import _is_group_offload_enabled
+
         # Checks if the model has been loaded in 4-bit or 8-bit with BNB
         if getattr(self, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES:
             if getattr(self, "is_loaded_in_8bit", False):
@@ -1241,6 +1243,14 @@ def cuda(self, *args, **kwargs):
                     "Calling `cuda()` is not supported for `4-bit` quantized models with the installed version of bitsandbytes. "
                     f"The current device is `{self.device}`. If you intended to move the model, please install bitsandbytes >= 0.43.2."
                 )
+
+        # Checks if group offloading is enabled
+        if _is_group_offload_enabled(self):
+            logger.warning(
+                f"The module '{self.__class__.__name__}' is group offloaded and moving it using `.cuda()` is not supported."
+            )
+            return self
+
         return super().cuda(*args, **kwargs)
 
     # Adapted from `transformers`.