Skip to content

Commit 8913d59

Browse files
committed
add to method to modular loader, copied from DiffusionPipeline, not tested yet
1 parent 5a8c1b5 commit 8913d59

File tree

1 file changed

+190
-3
lines changed

1 file changed

+190
-3
lines changed

src/diffusers/modular_pipelines/modular_pipeline.py

Lines changed: 190 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from collections import OrderedDict
2020
from dataclasses import dataclass, field
2121
from typing import Any, Dict, List, Tuple, Union, Optional
22+
from typing_extensions import Self
2223
from copy import deepcopy
2324

2425

@@ -2012,9 +2013,195 @@ def load(self, component_names: Optional[List[str]] = None, **kwargs):
20122013
# Register all components at once
20132014
self.register_components(**components_to_register)
20142015

2015-
# YiYi TODO: should support to method
2016-
def to(self, *args, **kwargs):
2017-
pass
2016+
# Copied from diffusers.pipelines.pipeline_utils.DiffusionPipeline.to
2017+
def to(self, *args, **kwargs) -> Self:
2018+
r"""
2019+
Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the
2020+
arguments of `self.to(*args, **kwargs).`
2021+
2022+
<Tip>
2023+
2024+
If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. Otherwise,
2025+
the returned pipeline is a copy of self with the desired torch.dtype and torch.device.
2026+
2027+
</Tip>
2028+
2029+
2030+
Here are the ways to call `to`:
2031+
2032+
- `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
2033+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2034+
- `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
2035+
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
2036+
- `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the
2037+
specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and
2038+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2039+
2040+
Arguments:
2041+
dtype (`torch.dtype`, *optional*):
2042+
Returns a pipeline with the specified
2043+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2044+
device (`torch.Device`, *optional*):
2045+
Returns a pipeline with the specified
2046+
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
2047+
silence_dtype_warnings (`str`, *optional*, defaults to `False`):
2048+
Whether to omit warnings if the target `dtype` is not compatible with the target `device`.
2049+
2050+
Returns:
2051+
[`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`.
2052+
"""
2053+
dtype = kwargs.pop("dtype", None)
2054+
device = kwargs.pop("device", None)
2055+
silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False)
2056+
2057+
dtype_arg = None
2058+
device_arg = None
2059+
if len(args) == 1:
2060+
if isinstance(args[0], torch.dtype):
2061+
dtype_arg = args[0]
2062+
else:
2063+
device_arg = torch.device(args[0]) if args[0] is not None else None
2064+
elif len(args) == 2:
2065+
if isinstance(args[0], torch.dtype):
2066+
raise ValueError(
2067+
"When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`."
2068+
)
2069+
device_arg = torch.device(args[0]) if args[0] is not None else None
2070+
dtype_arg = args[1]
2071+
elif len(args) > 2:
2072+
raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`")
2073+
2074+
if dtype is not None and dtype_arg is not None:
2075+
raise ValueError(
2076+
"You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two."
2077+
)
2078+
2079+
dtype = dtype or dtype_arg
2080+
2081+
if device is not None and device_arg is not None:
2082+
raise ValueError(
2083+
"You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two."
2084+
)
2085+
2086+
device = device or device_arg
2087+
device_type = torch.device(device).type if device is not None else None
2088+
pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items())
2089+
2090+
# throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU.
2091+
def module_is_sequentially_offloaded(module):
2092+
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
2093+
return False
2094+
2095+
_, _, is_loaded_in_8bit_bnb = _check_bnb_status(module)
2096+
2097+
if is_loaded_in_8bit_bnb:
2098+
return False
2099+
2100+
return hasattr(module, "_hf_hook") and (
2101+
isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
2102+
or hasattr(module._hf_hook, "hooks")
2103+
and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
2104+
)
2105+
2106+
def module_is_offloaded(module):
2107+
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
2108+
return False
2109+
2110+
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
2111+
2112+
# .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
2113+
pipeline_is_sequentially_offloaded = any(
2114+
module_is_sequentially_offloaded(module) for _, module in self.components.items()
2115+
)
2116+
2117+
is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
2118+
if is_pipeline_device_mapped:
2119+
raise ValueError(
2120+
"It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline."
2121+
)
2122+
2123+
if device_type in ["cuda", "xpu"]:
2124+
if pipeline_is_sequentially_offloaded and not pipeline_has_bnb:
2125+
raise ValueError(
2126+
"It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading."
2127+
)
2128+
# PR: https://github.com/huggingface/accelerate/pull/3223/
2129+
elif pipeline_has_bnb and is_accelerate_version("<", "1.1.0.dev0"):
2130+
raise ValueError(
2131+
"You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation."
2132+
)
2133+
2134+
# Display a warning in this case (the operation succeeds but the benefits are lost)
2135+
pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
2136+
if pipeline_is_offloaded and device_type in ["cuda", "xpu"]:
2137+
logger.warning(
2138+
f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading."
2139+
)
2140+
2141+
# Enable generic support for Intel Gaudi accelerator using GPU/HPU migration
2142+
if device_type == "hpu" and kwargs.pop("hpu_migration", True) and is_hpu_available():
2143+
os.environ["PT_HPU_GPU_MIGRATION"] = "1"
2144+
logger.debug("Environment variable set: PT_HPU_GPU_MIGRATION=1")
2145+
2146+
import habana_frameworks.torch # noqa: F401
2147+
2148+
# HPU hardware check
2149+
if not (hasattr(torch, "hpu") and torch.hpu.is_available()):
2150+
raise ValueError("You are trying to call `.to('hpu')` but HPU device is unavailable.")
2151+
2152+
os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1"
2153+
logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1")
2154+
2155+
module_names, _ = self._get_signature_keys(self)
2156+
modules = [getattr(self, n, None) for n in module_names]
2157+
modules = [m for m in modules if isinstance(m, torch.nn.Module)]
2158+
2159+
is_offloaded = pipeline_is_offloaded or pipeline_is_sequentially_offloaded
2160+
for module in modules:
2161+
_, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb = _check_bnb_status(module)
2162+
is_group_offloaded = self._maybe_raise_error_if_group_offload_active(module=module)
2163+
2164+
if (is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb) and dtype is not None:
2165+
logger.warning(
2166+
f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` {'4bit' if is_loaded_in_4bit_bnb else '8bit'} and conversion to {dtype} is not supported. Module is still in {'4bit' if is_loaded_in_4bit_bnb else '8bit'} precision."
2167+
)
2168+
2169+
if is_loaded_in_8bit_bnb and device is not None:
2170+
logger.warning(
2171+
f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` 8bit and moving it to {device} via `.to()` is not supported. Module is still on {module.device}."
2172+
)
2173+
2174+
# Note: we also handle this at the ModelMixin level. The reason for doing it here too is that modeling
2175+
# components can be from outside diffusers too, but still have group offloading enabled.
2176+
if (
2177+
self._maybe_raise_error_if_group_offload_active(raise_error=False, module=module)
2178+
and device is not None
2179+
):
2180+
logger.warning(
2181+
f"The module '{module.__class__.__name__}' is group offloaded and moving it to {device} via `.to()` is not supported."
2182+
)
2183+
2184+
# This can happen for `transformer` models. CPU placement was added in
2185+
# https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly.
2186+
if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"):
2187+
module.to(device=device)
2188+
elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb and not is_group_offloaded:
2189+
module.to(device, dtype)
2190+
2191+
if (
2192+
module.dtype == torch.float16
2193+
and str(device) in ["cpu"]
2194+
and not silence_dtype_warnings
2195+
and not is_offloaded
2196+
):
2197+
logger.warning(
2198+
"Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It"
2199+
" is not recommended to move them to `cpu` as running them will fail. Please make"
2200+
" sure to use an accelerator to run the pipeline in inference, due to the lack of"
2201+
" support for`float16` operations on this device in PyTorch. Please, remove the"
2202+
" `torch_dtype=torch.float16` argument, or use another device for inference."
2203+
)
2204+
return self
20182205

20192206
# YiYi TODO:
20202207
# 1. should support save some components too! currently only modular_model_index.json is saved

0 commit comments

Comments
 (0)