Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/diffusers/pipelines/pipeline_loading_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
deprecate,
get_class_from_dynamic_module,
is_accelerate_available,
is_accelerate_version,
is_peft_available,
is_transformers_available,
logging,
Expand Down Expand Up @@ -947,3 +948,18 @@ def _get_ignore_patterns(
)

return ignore_patterns


def model_has_device_map(model):
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
return False

# Check if the model has a device map that is not exclusively CPU
# `device_map` can only contain CPU when a model has sharded checkpoints.
# See here: https://github.com/huggingface/diffusers/blob/41e4779d988ead99e7acd78dc8e752de88777d0f/src/diffusers/models/modeling_utils.py#L883
device_map = getattr(model, "hf_device_map", None)
if device_map is not None:
unique_devices = set(device_map.values())
return len(unique_devices) > 1 or unique_devices != {"cpu"}

return False
37 changes: 36 additions & 1 deletion tests/pipelines/kandinsky/test_kandinsky_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import tempfile
import unittest

import numpy as np
Expand All @@ -28,11 +30,16 @@
)

from diffusers import KandinskyPriorPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device

from ..test_pipelines_common import PipelineTesterMixin


if is_accelerate_available():
from accelerate.utils import compute_module_sizes

enable_full_determinism()


Expand Down Expand Up @@ -236,3 +243,31 @@ def test_attention_slicing_forward_pass(self):
test_max_difference=test_max_difference,
test_mean_pixel_difference=test_mean_pixel_difference,
)

# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()

component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break

assert component_selected, "No component selected that can be sharded."

model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))

with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))

loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)
37 changes: 36 additions & 1 deletion tests/pipelines/kandinsky2_2/test_kandinsky_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# limitations under the License.

import inspect
import os
import tempfile
import unittest

import numpy as np
Expand All @@ -29,11 +31,16 @@
)

from diffusers import KandinskyV22PriorPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device

from ..test_pipelines_common import PipelineTesterMixin


if is_accelerate_available():
from accelerate.utils import compute_module_sizes

enable_full_determinism()


Expand Down Expand Up @@ -277,3 +284,31 @@ def callback_inputs_test(pipe, i, t, callback_kwargs):

output = pipe(**inputs)[0]
assert output.abs().sum() == 0

# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()

component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break

assert component_selected, "No component selected that can be sharded."

model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))

with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))

loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)
36 changes: 36 additions & 0 deletions tests/pipelines/kandinsky2_2/test_kandinsky_prior_emb2emb.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import random
import tempfile
import unittest

import numpy as np
Expand All @@ -30,16 +32,22 @@
)

from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
is_accelerate_available,
skip_mps,
torch_device,
)

from ..test_pipelines_common import PipelineTesterMixin


if is_accelerate_available():
from accelerate.utils import compute_module_sizes

enable_full_determinism()


Expand Down Expand Up @@ -240,3 +248,31 @@ def test_attention_slicing_forward_pass(self):
test_max_difference=test_max_difference,
test_mean_pixel_difference=test_mean_pixel_difference,
)

# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()

component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break

assert component_selected, "No component selected that can be sharded."

model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))

with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))

loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)
57 changes: 56 additions & 1 deletion tests/pipelines/stable_unclip/test_stable_unclip.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import gc
import os
import tempfile
import unittest

import torch
Expand All @@ -12,8 +14,17 @@
StableUnCLIPPipeline,
UNet2DConditionModel,
)
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import (
enable_full_determinism,
is_accelerate_available,
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
Expand All @@ -24,6 +35,10 @@
)


if is_accelerate_available():
from accelerate.utils import compute_module_sizes


enable_full_determinism()


Expand Down Expand Up @@ -184,6 +199,46 @@ def test_attention_slicing_forward_pass(self):
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=1e-3)

@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass

@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass

@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass

# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()

component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break

assert component_selected, "No component selected that can be sharded."

model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))

with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))

loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)


@nightly
@require_torch_gpu
Expand Down
Loading
Loading