huggingface · sayakpaul · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py
@@ -36,6 +36,7 @@
     deprecate,
     get_class_from_dynamic_module,
     is_accelerate_available,
+    is_accelerate_version,
     is_peft_available,
     is_transformers_available,
     logging,
@@ -947,3 +948,18 @@ def _get_ignore_patterns(
             )
 
     return ignore_patterns
+
+
+def model_has_device_map(model):
+    if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
+        return False
+
+    # Check if the model has a device map that is not exclusively CPU
+    # `device_map` can only contain CPU when a model has sharded checkpoints.
+    # See here: https://github.com/huggingface/diffusers/blob/41e4779d988ead99e7acd78dc8e752de88777d0f/src/diffusers/models/modeling_utils.py#L883
+    device_map = getattr(model, "hf_device_map", None)
+    if device_map is not None:
+        unique_devices = set(device_map.values())
+        return len(unique_devices) > 1 or unique_devices != {"cpu"}
+
+    return False
diff --git a/tests/pipelines/kandinsky/test_kandinsky_prior.py b/tests/pipelines/kandinsky/test_kandinsky_prior.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
 
 import numpy as np
@@ -28,11 +30,16 @@
 )
 
 from diffusers import KandinskyPriorPipeline, PriorTransformer, UnCLIPScheduler
-from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
+from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
 
 from ..test_pipelines_common import PipelineTesterMixin
 
 
+if is_accelerate_available():
+    from accelerate.utils import compute_module_sizes
+
 enable_full_determinism()
 
 
@@ -236,3 +243,31 @@ def test_attention_slicing_forward_pass(self):
             test_max_difference=test_max_difference,
             test_mean_pixel_difference=test_mean_pixel_difference,
         )
+
+    # It needs a different sharding ratio than the standard 0.75. So, we override it.
+    def test_sharded_components_can_be_device_placed(self):
+        components = self.get_dummy_components()
+
+        component_selected = None
+        for component_name in components:
+            if isinstance(components[component_name], ModelMixin) and hasattr(
+                components[component_name], "load_config"
+            ):
+                component_to_be_sharded = components[component_name]
+                component_cls = component_to_be_sharded.__class__
+                component_selected = component_name
+                break
+
+        assert component_selected, "No component selected that can be sharded."
+
+        model_size = compute_module_sizes(component_to_be_sharded)[""]
+        max_shard_size = int((model_size * 0.45) / (2**10))
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
+
+            loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
+            _ = components.pop(component_selected)
+            components.update({component_selected: loaded_sharded_component})
+            _ = self.pipeline_class(**components).to(torch_device)
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_prior.py b/tests/pipelines/kandinsky2_2/test_kandinsky_prior.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 
 import inspect
+import os
+import tempfile
 import unittest
 
 import numpy as np
@@ -29,11 +31,16 @@
 )
 
 from diffusers import KandinskyV22PriorPipeline, PriorTransformer, UnCLIPScheduler
-from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
+from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
 
 from ..test_pipelines_common import PipelineTesterMixin
 
 
+if is_accelerate_available():
+    from accelerate.utils import compute_module_sizes
+
 enable_full_determinism()
 
 
@@ -277,3 +284,31 @@ def callback_inputs_test(pipe, i, t, callback_kwargs):
 
         output = pipe(**inputs)[0]
         assert output.abs().sum() == 0
+
+    # It needs a different sharding ratio than the standard 0.75. So, we override it.
+    def test_sharded_components_can_be_device_placed(self):
+        components = self.get_dummy_components()
+
+        component_selected = None
+        for component_name in components:
+            if isinstance(components[component_name], ModelMixin) and hasattr(
+                components[component_name], "load_config"
+            ):
+                component_to_be_sharded = components[component_name]
+                component_cls = component_to_be_sharded.__class__
+                component_selected = component_name
+                break
+
+        assert component_selected, "No component selected that can be sharded."
+
+        model_size = compute_module_sizes(component_to_be_sharded)[""]
+        max_shard_size = int((model_size * 0.45) / (2**10))
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
+
+            loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
+            _ = components.pop(component_selected)
+            components.update({component_selected: loaded_sharded_component})
+            _ = self.pipeline_class(**components).to(torch_device)
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_prior_emb2emb.py b/tests/pipelines/kandinsky2_2/test_kandinsky_prior_emb2emb.py
@@ -13,7 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import random
+import tempfile
 import unittest
 
 import numpy as np
@@ -30,16 +32,22 @@
 )
 
 from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
     floats_tensor,
+    is_accelerate_available,
     skip_mps,
     torch_device,
 )
 
 from ..test_pipelines_common import PipelineTesterMixin
 
 
+if is_accelerate_available():
+    from accelerate.utils import compute_module_sizes
+
 enable_full_determinism()
 
 
@@ -240,3 +248,31 @@ def test_attention_slicing_forward_pass(self):
             test_max_difference=test_max_difference,
             test_mean_pixel_difference=test_mean_pixel_difference,
         )
+
+    # It needs a different sharding ratio than the standard 0.75. So, we override it.
+    def test_sharded_components_can_be_device_placed(self):
+        components = self.get_dummy_components()
+
+        component_selected = None
+        for component_name in components:
+            if isinstance(components[component_name], ModelMixin) and hasattr(
+                components[component_name], "load_config"
+            ):
+                component_to_be_sharded = components[component_name]
+                component_cls = component_to_be_sharded.__class__
+                component_selected = component_name
+                break
+
+        assert component_selected, "No component selected that can be sharded."
+
+        model_size = compute_module_sizes(component_to_be_sharded)[""]
+        max_shard_size = int((model_size * 0.45) / (2**10))
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
+
+            loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
+            _ = components.pop(component_selected)
+            components.update({component_selected: loaded_sharded_component})
+            _ = self.pipeline_class(**components).to(torch_device)
diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py
@@ -1,4 +1,6 @@
 import gc
+import os
+import tempfile
 import unittest
 
 import torch
@@ -12,8 +14,17 @@
     StableUnCLIPPipeline,
     UNet2DConditionModel,
 )
+from diffusers.models.modeling_utils import ModelMixin
 from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
-from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device
+from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
+from diffusers.utils.testing_utils import (
+    enable_full_determinism,
+    is_accelerate_available,
+    load_numpy,
+    nightly,
+    require_torch_gpu,
+    torch_device,
+)
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import (
@@ -24,6 +35,10 @@
 )
 
 
+if is_accelerate_available():
+    from accelerate.utils import compute_module_sizes
+
+
 enable_full_determinism()
 
 
@@ -184,6 +199,46 @@ def test_attention_slicing_forward_pass(self):
     def test_inference_batch_single_identical(self):
         self._test_inference_batch_single_identical(expected_max_diff=1e-3)
 
+    @unittest.skip("Test not supported.")
+    def test_calling_mco_raises_error_device_mapped_components(self):
+        pass
+
+    @unittest.skip("Test not supported.")
+    def test_calling_to_raises_error_device_mapped_components(self):
+        pass
+
+    @unittest.skip("Test not supported.")
+    def test_calling_sco_raises_error_device_mapped_components(self):
+        pass
+
+    # It needs a different sharding ratio than the standard 0.75. So, we override it.
+    def test_sharded_components_can_be_device_placed(self):
+        components = self.get_dummy_components()
+
+        component_selected = None
+        for component_name in components:
+            if isinstance(components[component_name], ModelMixin) and hasattr(
+                components[component_name], "load_config"
+            ):
+                component_to_be_sharded = components[component_name]
+                component_cls = component_to_be_sharded.__class__
+                component_selected = component_name
+                break
+
+        assert component_selected, "No component selected that can be sharded."
+
+        model_size = compute_module_sizes(component_to_be_sharded)[""]
+        max_shard_size = int((model_size * 0.45) / (2**10))
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
+            self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
+
+            loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
+            _ = components.pop(component_selected)
+            components.update({component_selected: loaded_sharded_component})
+            _ = self.pipeline_class(**components).to(torch_device)
+
 
 @nightly
 @require_torch_gpu