|
19 | 19 | from collections import OrderedDict |
20 | 20 | from dataclasses import dataclass, field |
21 | 21 | from typing import Any, Dict, List, Tuple, Union, Optional |
| 22 | +from typing_extensions import Self |
22 | 23 | from copy import deepcopy |
23 | 24 |
|
24 | 25 |
|
@@ -1997,9 +1998,195 @@ def load(self, component_names: Optional[List[str]] = None, **kwargs): |
1997 | 1998 | # Register all components at once |
1998 | 1999 | self.register_components(**components_to_register) |
1999 | 2000 |
|
2000 | | - # YiYi TODO: should support to method |
2001 | | - def to(self, *args, **kwargs): |
2002 | | - pass |
| 2001 | + # Copied from diffusers.pipelines.pipeline_utils.DiffusionPipeline.to |
| 2002 | + def to(self, *args, **kwargs) -> Self: |
| 2003 | + r""" |
| 2004 | + Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the |
| 2005 | + arguments of `self.to(*args, **kwargs).` |
| 2006 | +
|
| 2007 | + <Tip> |
| 2008 | +
|
| 2009 | + If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. Otherwise, |
| 2010 | + the returned pipeline is a copy of self with the desired torch.dtype and torch.device. |
| 2011 | +
|
| 2012 | + </Tip> |
| 2013 | +
|
| 2014 | +
|
| 2015 | + Here are the ways to call `to`: |
| 2016 | +
|
| 2017 | + - `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified |
| 2018 | + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) |
| 2019 | + - `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified |
| 2020 | + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) |
| 2021 | + - `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the |
| 2022 | + specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and |
| 2023 | + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) |
| 2024 | +
|
| 2025 | + Arguments: |
| 2026 | + dtype (`torch.dtype`, *optional*): |
| 2027 | + Returns a pipeline with the specified |
| 2028 | + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) |
| 2029 | + device (`torch.Device`, *optional*): |
| 2030 | + Returns a pipeline with the specified |
| 2031 | + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) |
| 2032 | + silence_dtype_warnings (`str`, *optional*, defaults to `False`): |
| 2033 | + Whether to omit warnings if the target `dtype` is not compatible with the target `device`. |
| 2034 | +
|
| 2035 | + Returns: |
| 2036 | + [`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`. |
| 2037 | + """ |
| 2038 | + dtype = kwargs.pop("dtype", None) |
| 2039 | + device = kwargs.pop("device", None) |
| 2040 | + silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False) |
| 2041 | + |
| 2042 | + dtype_arg = None |
| 2043 | + device_arg = None |
| 2044 | + if len(args) == 1: |
| 2045 | + if isinstance(args[0], torch.dtype): |
| 2046 | + dtype_arg = args[0] |
| 2047 | + else: |
| 2048 | + device_arg = torch.device(args[0]) if args[0] is not None else None |
| 2049 | + elif len(args) == 2: |
| 2050 | + if isinstance(args[0], torch.dtype): |
| 2051 | + raise ValueError( |
| 2052 | + "When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`." |
| 2053 | + ) |
| 2054 | + device_arg = torch.device(args[0]) if args[0] is not None else None |
| 2055 | + dtype_arg = args[1] |
| 2056 | + elif len(args) > 2: |
| 2057 | + raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`") |
| 2058 | + |
| 2059 | + if dtype is not None and dtype_arg is not None: |
| 2060 | + raise ValueError( |
| 2061 | + "You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two." |
| 2062 | + ) |
| 2063 | + |
| 2064 | + dtype = dtype or dtype_arg |
| 2065 | + |
| 2066 | + if device is not None and device_arg is not None: |
| 2067 | + raise ValueError( |
| 2068 | + "You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two." |
| 2069 | + ) |
| 2070 | + |
| 2071 | + device = device or device_arg |
| 2072 | + device_type = torch.device(device).type if device is not None else None |
| 2073 | + pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items()) |
| 2074 | + |
| 2075 | + # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU. |
| 2076 | + def module_is_sequentially_offloaded(module): |
| 2077 | + if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): |
| 2078 | + return False |
| 2079 | + |
| 2080 | + _, _, is_loaded_in_8bit_bnb = _check_bnb_status(module) |
| 2081 | + |
| 2082 | + if is_loaded_in_8bit_bnb: |
| 2083 | + return False |
| 2084 | + |
| 2085 | + return hasattr(module, "_hf_hook") and ( |
| 2086 | + isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook) |
| 2087 | + or hasattr(module._hf_hook, "hooks") |
| 2088 | + and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook) |
| 2089 | + ) |
| 2090 | + |
| 2091 | + def module_is_offloaded(module): |
| 2092 | + if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"): |
| 2093 | + return False |
| 2094 | + |
| 2095 | + return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) |
| 2096 | + |
| 2097 | + # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer |
| 2098 | + pipeline_is_sequentially_offloaded = any( |
| 2099 | + module_is_sequentially_offloaded(module) for _, module in self.components.items() |
| 2100 | + ) |
| 2101 | + |
| 2102 | + is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 |
| 2103 | + if is_pipeline_device_mapped: |
| 2104 | + raise ValueError( |
| 2105 | + "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline." |
| 2106 | + ) |
| 2107 | + |
| 2108 | + if device_type in ["cuda", "xpu"]: |
| 2109 | + if pipeline_is_sequentially_offloaded and not pipeline_has_bnb: |
| 2110 | + raise ValueError( |
| 2111 | + "It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading." |
| 2112 | + ) |
| 2113 | + # PR: https://github.com/huggingface/accelerate/pull/3223/ |
| 2114 | + elif pipeline_has_bnb and is_accelerate_version("<", "1.1.0.dev0"): |
| 2115 | + raise ValueError( |
| 2116 | + "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation." |
| 2117 | + ) |
| 2118 | + |
| 2119 | + # Display a warning in this case (the operation succeeds but the benefits are lost) |
| 2120 | + pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items()) |
| 2121 | + if pipeline_is_offloaded and device_type in ["cuda", "xpu"]: |
| 2122 | + logger.warning( |
| 2123 | + f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading." |
| 2124 | + ) |
| 2125 | + |
| 2126 | + # Enable generic support for Intel Gaudi accelerator using GPU/HPU migration |
| 2127 | + if device_type == "hpu" and kwargs.pop("hpu_migration", True) and is_hpu_available(): |
| 2128 | + os.environ["PT_HPU_GPU_MIGRATION"] = "1" |
| 2129 | + logger.debug("Environment variable set: PT_HPU_GPU_MIGRATION=1") |
| 2130 | + |
| 2131 | + import habana_frameworks.torch # noqa: F401 |
| 2132 | + |
| 2133 | + # HPU hardware check |
| 2134 | + if not (hasattr(torch, "hpu") and torch.hpu.is_available()): |
| 2135 | + raise ValueError("You are trying to call `.to('hpu')` but HPU device is unavailable.") |
| 2136 | + |
| 2137 | + os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1" |
| 2138 | + logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1") |
| 2139 | + |
| 2140 | + module_names, _ = self._get_signature_keys(self) |
| 2141 | + modules = [getattr(self, n, None) for n in module_names] |
| 2142 | + modules = [m for m in modules if isinstance(m, torch.nn.Module)] |
| 2143 | + |
| 2144 | + is_offloaded = pipeline_is_offloaded or pipeline_is_sequentially_offloaded |
| 2145 | + for module in modules: |
| 2146 | + _, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb = _check_bnb_status(module) |
| 2147 | + is_group_offloaded = self._maybe_raise_error_if_group_offload_active(module=module) |
| 2148 | + |
| 2149 | + if (is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb) and dtype is not None: |
| 2150 | + logger.warning( |
| 2151 | + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` {'4bit' if is_loaded_in_4bit_bnb else '8bit'} and conversion to {dtype} is not supported. Module is still in {'4bit' if is_loaded_in_4bit_bnb else '8bit'} precision." |
| 2152 | + ) |
| 2153 | + |
| 2154 | + if is_loaded_in_8bit_bnb and device is not None: |
| 2155 | + logger.warning( |
| 2156 | + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` 8bit and moving it to {device} via `.to()` is not supported. Module is still on {module.device}." |
| 2157 | + ) |
| 2158 | + |
| 2159 | + # Note: we also handle this at the ModelMixin level. The reason for doing it here too is that modeling |
| 2160 | + # components can be from outside diffusers too, but still have group offloading enabled. |
| 2161 | + if ( |
| 2162 | + self._maybe_raise_error_if_group_offload_active(raise_error=False, module=module) |
| 2163 | + and device is not None |
| 2164 | + ): |
| 2165 | + logger.warning( |
| 2166 | + f"The module '{module.__class__.__name__}' is group offloaded and moving it to {device} via `.to()` is not supported." |
| 2167 | + ) |
| 2168 | + |
| 2169 | + # This can happen for `transformer` models. CPU placement was added in |
| 2170 | + # https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly. |
| 2171 | + if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"): |
| 2172 | + module.to(device=device) |
| 2173 | + elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb and not is_group_offloaded: |
| 2174 | + module.to(device, dtype) |
| 2175 | + |
| 2176 | + if ( |
| 2177 | + module.dtype == torch.float16 |
| 2178 | + and str(device) in ["cpu"] |
| 2179 | + and not silence_dtype_warnings |
| 2180 | + and not is_offloaded |
| 2181 | + ): |
| 2182 | + logger.warning( |
| 2183 | + "Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It" |
| 2184 | + " is not recommended to move them to `cpu` as running them will fail. Please make" |
| 2185 | + " sure to use an accelerator to run the pipeline in inference, due to the lack of" |
| 2186 | + " support for`float16` operations on this device in PyTorch. Please, remove the" |
| 2187 | + " `torch_dtype=torch.float16` argument, or use another device for inference." |
| 2188 | + ) |
| 2189 | + return self |
2003 | 2190 |
|
2004 | 2191 | # YiYi TODO: |
2005 | 2192 | # 1. should support save some components too! currently only modular_model_index.json is saved |
|
0 commit comments