Skip to content

Commit b46b7c8

Browse files
committed
add to method to modular loader, copied from DiffusionPipeline, not tested yet
1 parent fc9168f commit b46b7c8

File tree

1 file changed

+190
-3
lines changed

1 file changed

+190
-3
lines changed

src/diffusers/modular_pipelines/modular_pipeline.py

Lines changed: 190 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from collections import OrderedDict
2020
from dataclasses import dataclass, field
2121
from typing import Any, Dict, List, Tuple, Union, Optional
22+
from typing_extensions import Self
2223
from copy import deepcopy
2324

2425

@@ -1997,9 +1998,195 @@ def load(self, component_names: Optional[List[str]] = None, **kwargs):
19971998
# Register all components at once
19981999
self.register_components(**components_to_register)
19992000

2000-
# YiYi TODO: should support to method
2001-
def to(self, *args, **kwargs):
2002-
pass
2001+
# Copied from diffusers.pipelines.pipeline_utils.DiffusionPipeline.to
2002+
def to(self, *args, **kwargs) -> Self:
2003+
r"""
2004+
Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the
2005+
arguments of `self.to(*args, **kwargs).`
2006+
2007+
<Tip>
2008+
2009+
If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. Otherwise,
2010+
the returned pipeline is a copy of self with the desired torch.dtype and torch.device.
2011+
2012+
</Tip>
2013+
2014+
2015+
Here are the ways to call `to`:
2016+
2017+
- `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
2018+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2019+
- `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
2020+
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
2021+
- `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the
2022+
specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and
2023+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2024+
2025+
Arguments:
2026+
dtype (`torch.dtype`, *optional*):
2027+
Returns a pipeline with the specified
2028+
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
2029+
device (`torch.Device`, *optional*):
2030+
Returns a pipeline with the specified
2031+
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
2032+
silence_dtype_warnings (`str`, *optional*, defaults to `False`):
2033+
Whether to omit warnings if the target `dtype` is not compatible with the target `device`.
2034+
2035+
Returns:
2036+
[`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`.
2037+
"""
2038+
dtype = kwargs.pop("dtype", None)
2039+
device = kwargs.pop("device", None)
2040+
silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False)
2041+
2042+
dtype_arg = None
2043+
device_arg = None
2044+
if len(args) == 1:
2045+
if isinstance(args[0], torch.dtype):
2046+
dtype_arg = args[0]
2047+
else:
2048+
device_arg = torch.device(args[0]) if args[0] is not None else None
2049+
elif len(args) == 2:
2050+
if isinstance(args[0], torch.dtype):
2051+
raise ValueError(
2052+
"When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`."
2053+
)
2054+
device_arg = torch.device(args[0]) if args[0] is not None else None
2055+
dtype_arg = args[1]
2056+
elif len(args) > 2:
2057+
raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`")
2058+
2059+
if dtype is not None and dtype_arg is not None:
2060+
raise ValueError(
2061+
"You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two."
2062+
)
2063+
2064+
dtype = dtype or dtype_arg
2065+
2066+
if device is not None and device_arg is not None:
2067+
raise ValueError(
2068+
"You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two."
2069+
)
2070+
2071+
device = device or device_arg
2072+
device_type = torch.device(device).type if device is not None else None
2073+
pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items())
2074+
2075+
# throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU.
2076+
def module_is_sequentially_offloaded(module):
2077+
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
2078+
return False
2079+
2080+
_, _, is_loaded_in_8bit_bnb = _check_bnb_status(module)
2081+
2082+
if is_loaded_in_8bit_bnb:
2083+
return False
2084+
2085+
return hasattr(module, "_hf_hook") and (
2086+
isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
2087+
or hasattr(module._hf_hook, "hooks")
2088+
and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
2089+
)
2090+
2091+
def module_is_offloaded(module):
2092+
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
2093+
return False
2094+
2095+
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
2096+
2097+
# .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
2098+
pipeline_is_sequentially_offloaded = any(
2099+
module_is_sequentially_offloaded(module) for _, module in self.components.items()
2100+
)
2101+
2102+
is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
2103+
if is_pipeline_device_mapped:
2104+
raise ValueError(
2105+
"It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline."
2106+
)
2107+
2108+
if device_type in ["cuda", "xpu"]:
2109+
if pipeline_is_sequentially_offloaded and not pipeline_has_bnb:
2110+
raise ValueError(
2111+
"It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading."
2112+
)
2113+
# PR: https://github.com/huggingface/accelerate/pull/3223/
2114+
elif pipeline_has_bnb and is_accelerate_version("<", "1.1.0.dev0"):
2115+
raise ValueError(
2116+
"You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation."
2117+
)
2118+
2119+
# Display a warning in this case (the operation succeeds but the benefits are lost)
2120+
pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
2121+
if pipeline_is_offloaded and device_type in ["cuda", "xpu"]:
2122+
logger.warning(
2123+
f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading."
2124+
)
2125+
2126+
# Enable generic support for Intel Gaudi accelerator using GPU/HPU migration
2127+
if device_type == "hpu" and kwargs.pop("hpu_migration", True) and is_hpu_available():
2128+
os.environ["PT_HPU_GPU_MIGRATION"] = "1"
2129+
logger.debug("Environment variable set: PT_HPU_GPU_MIGRATION=1")
2130+
2131+
import habana_frameworks.torch # noqa: F401
2132+
2133+
# HPU hardware check
2134+
if not (hasattr(torch, "hpu") and torch.hpu.is_available()):
2135+
raise ValueError("You are trying to call `.to('hpu')` but HPU device is unavailable.")
2136+
2137+
os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1"
2138+
logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1")
2139+
2140+
module_names, _ = self._get_signature_keys(self)
2141+
modules = [getattr(self, n, None) for n in module_names]
2142+
modules = [m for m in modules if isinstance(m, torch.nn.Module)]
2143+
2144+
is_offloaded = pipeline_is_offloaded or pipeline_is_sequentially_offloaded
2145+
for module in modules:
2146+
_, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb = _check_bnb_status(module)
2147+
is_group_offloaded = self._maybe_raise_error_if_group_offload_active(module=module)
2148+
2149+
if (is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb) and dtype is not None:
2150+
logger.warning(
2151+
f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` {'4bit' if is_loaded_in_4bit_bnb else '8bit'} and conversion to {dtype} is not supported. Module is still in {'4bit' if is_loaded_in_4bit_bnb else '8bit'} precision."
2152+
)
2153+
2154+
if is_loaded_in_8bit_bnb and device is not None:
2155+
logger.warning(
2156+
f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` 8bit and moving it to {device} via `.to()` is not supported. Module is still on {module.device}."
2157+
)
2158+
2159+
# Note: we also handle this at the ModelMixin level. The reason for doing it here too is that modeling
2160+
# components can be from outside diffusers too, but still have group offloading enabled.
2161+
if (
2162+
self._maybe_raise_error_if_group_offload_active(raise_error=False, module=module)
2163+
and device is not None
2164+
):
2165+
logger.warning(
2166+
f"The module '{module.__class__.__name__}' is group offloaded and moving it to {device} via `.to()` is not supported."
2167+
)
2168+
2169+
# This can happen for `transformer` models. CPU placement was added in
2170+
# https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly.
2171+
if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"):
2172+
module.to(device=device)
2173+
elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb and not is_group_offloaded:
2174+
module.to(device, dtype)
2175+
2176+
if (
2177+
module.dtype == torch.float16
2178+
and str(device) in ["cpu"]
2179+
and not silence_dtype_warnings
2180+
and not is_offloaded
2181+
):
2182+
logger.warning(
2183+
"Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It"
2184+
" is not recommended to move them to `cpu` as running them will fail. Please make"
2185+
" sure to use an accelerator to run the pipeline in inference, due to the lack of"
2186+
" support for`float16` operations on this device in PyTorch. Please, remove the"
2187+
" `torch_dtype=torch.float16` argument, or use another device for inference."
2188+
)
2189+
return self
20032190

20042191
# YiYi TODO:
20052192
# 1. should support save some components too! currently only modular_model_index.json is saved

0 commit comments

Comments
 (0)