-
Notifications
You must be signed in to change notification settings - Fork 6.4k
[LoRA] fix: lora loading when using with a device_mapped model. #9449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 22 commits
dc1aee2
949a929
64b3ad1
6d03c12
d4bd94b
5479198
2846549
1ed0eb0
d2d59c3
5f3cae2
8f670e2
e42ec19
f63b04c
eefda54
ea727a3
71989e3
f62afac
2334f78
5ea1173
f64751e
c0dee87
4b6124a
fe2cca8
2db5d48
61903c8
03377b7
0bd40cb
a61b754
ccd8d2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |
| get_adapter_name, | ||
| get_peft_kwargs, | ||
| is_accelerate_available, | ||
| is_accelerate_version, | ||
| is_peft_version, | ||
| is_torch_version, | ||
| logging, | ||
|
|
@@ -398,9 +399,18 @@ def _optionally_disable_offloading(cls, _pipeline): | |
| is_model_cpu_offload = False | ||
| is_sequential_cpu_offload = False | ||
|
|
||
| def model_has_device_map(model): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After-effects of |
||
| if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
| return False | ||
| return getattr(model, "hf_device_map", None) is not None | ||
|
|
||
| if _pipeline is not None and _pipeline.hf_device_map is None: | ||
| for _, component in _pipeline.components.items(): | ||
| if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"): | ||
| if ( | ||
| isinstance(component, nn.Module) | ||
| and hasattr(component, "_hf_hook") | ||
| and not model_has_device_map(component) | ||
| ): | ||
| if not is_model_cpu_offload: | ||
| is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload) | ||
| if not is_sequential_cpu_offload: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -389,6 +389,11 @@ def to(self, *args, **kwargs): | |
|
|
||
| device = device or device_arg | ||
|
|
||
| def model_has_device_map(model): | ||
|
||
| if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
| return False | ||
| return getattr(model, "hf_device_map", None) is not None | ||
|
|
||
| # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU. | ||
| def module_is_sequentially_offloaded(module): | ||
| if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
|
|
@@ -406,6 +411,16 @@ def module_is_offloaded(module): | |
|
|
||
| return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) | ||
|
|
||
| # device-mapped modules should not go through any device placements. | ||
| device_mapped_components = [ | ||
| key for key, component in self.components.items() if model_has_device_map(component) | ||
| ] | ||
| if device_mapped_components: | ||
| raise ValueError( | ||
| "The following pipeline components have been found to use a device map: " | ||
| f"{device_mapped_components}. This is incompatible with explicitly setting the device using `to()`." | ||
| ) | ||
|
|
||
| # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer | ||
| pipeline_is_sequentially_offloaded = any( | ||
| module_is_sequentially_offloaded(module) for _, module in self.components.items() | ||
|
|
@@ -1002,6 +1017,22 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t | |
| The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will | ||
| default to "cuda". | ||
| """ | ||
|
|
||
| def model_has_device_map(model): | ||
| if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
| return False | ||
| return getattr(model, "hf_device_map", None) is not None | ||
|
|
||
| # device-mapped modules should not go through any device placements. | ||
| device_mapped_components = [ | ||
| key for key, component in self.components.items() if model_has_device_map(component) | ||
| ] | ||
| if device_mapped_components: | ||
| raise ValueError( | ||
| "The following pipeline components have been found to use a device map: " | ||
| f"{device_mapped_components}. This is incompatible with `enable_model_cpu_offload()`." | ||
| ) | ||
|
|
||
| is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 | ||
| if is_pipeline_device_mapped: | ||
| raise ValueError( | ||
|
|
@@ -1104,6 +1135,22 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un | |
| The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will | ||
| default to "cuda". | ||
| """ | ||
|
|
||
| def model_has_device_map(model): | ||
| if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
| return False | ||
| return getattr(model, "hf_device_map", None) is not None | ||
|
|
||
| # device-mapped modules should not go through any device placements. | ||
| device_mapped_components = [ | ||
| key for key, component in self.components.items() if model_has_device_map(component) | ||
| ] | ||
| if device_mapped_components: | ||
| raise ValueError( | ||
| "The following pipeline components have been found to use a device map: " | ||
| f"{device_mapped_components}. This is incompatible with `enable_sequential_cpu_offload()`." | ||
| ) | ||
|
|
||
| if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): | ||
| from accelerate import cpu_offload | ||
| else: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,19 +30,24 @@ | |
| ) | ||
| from diffusers.image_processor import VaeImageProcessor | ||
| from diffusers.loaders import IPAdapterMixin | ||
| from diffusers.models.adapter import MultiAdapter | ||
| from diffusers.models.attention_processor import AttnProcessor | ||
| from diffusers.models.controlnet_xs import UNetControlNetXSModel | ||
| from diffusers.models.unets.unet_3d_condition import UNet3DConditionModel | ||
| from diffusers.models.unets.unet_i2vgen_xl import I2VGenXLUNet | ||
| from diffusers.models.unets.unet_motion_model import UNetMotionModel | ||
| from diffusers.pipelines.controlnet import MultiControlNetModel | ||
| from diffusers.pipelines.pipeline_utils import StableDiffusionMixin | ||
| from diffusers.schedulers import KarrasDiffusionSchedulers | ||
| from diffusers.utils import logging | ||
| from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available | ||
| from diffusers.utils.testing_utils import ( | ||
| CaptureLogger, | ||
| nightly, | ||
| require_torch, | ||
| require_torch_multi_gpu, | ||
| skip_mps, | ||
| slow, | ||
| torch_device, | ||
| ) | ||
|
|
||
|
|
@@ -59,6 +64,10 @@ | |
| from ..others.test_utils import TOKEN, USER, is_staging_test | ||
|
|
||
|
|
||
| if is_accelerate_available(): | ||
| from accelerate.utils import compute_module_sizes | ||
|
|
||
|
|
||
| def to_np(tensor): | ||
| if isinstance(tensor, torch.Tensor): | ||
| tensor = tensor.detach().cpu().numpy() | ||
|
|
@@ -1907,6 +1916,99 @@ def test_StableDiffusionMixin_component(self): | |
| ) | ||
| ) | ||
|
|
||
| @require_torch_multi_gpu | ||
| @slow | ||
| @nightly | ||
| def test_calling_to_raises_error_device_mapped_components(self): | ||
| if "Combined" in self.pipeline_class.__name__: | ||
|
||
| return | ||
|
|
||
| # TODO (sayakpaul): skip these for now. revisit later. | ||
| components = self.get_dummy_components() | ||
| if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
| return | ||
|
|
||
| pipe = self.pipeline_class(**components) | ||
| max_model_size = max( | ||
| compute_module_sizes(module)[""] | ||
| for _, module in pipe.components.items() | ||
| if isinstance(module, torch.nn.Module) | ||
| ) | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| pipe.save_pretrained(tmpdir) | ||
| max_memory = {0: max_model_size, 1: max_model_size} | ||
| loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
|
||
| with self.assertRaises(ValueError) as err_context: | ||
| loaded_pipe.to(torch_device) | ||
|
|
||
| self.assertTrue( | ||
| "The following pipeline components have been found" in str(err_context.exception) | ||
| and "This is incompatible with explicitly setting the device using `to()`" in str(err_context.exception) | ||
| ) | ||
|
|
||
| @require_torch_multi_gpu | ||
| @slow | ||
| @nightly | ||
| def test_calling_mco_raises_error_device_mapped_components(self): | ||
| if "Combined" in self.pipeline_class.__name__: | ||
| return | ||
|
|
||
| # TODO (sayakpaul): skip these for now. revisit later. | ||
| components = self.get_dummy_components() | ||
| if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
| return | ||
|
|
||
| pipe = self.pipeline_class(**components) | ||
| max_model_size = max( | ||
| compute_module_sizes(module)[""] | ||
| for _, module in pipe.components.items() | ||
| if isinstance(module, torch.nn.Module) | ||
| ) | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| pipe.save_pretrained(tmpdir) | ||
| max_memory = {0: max_model_size, 1: max_model_size} | ||
| loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
|
||
| with self.assertRaises(ValueError) as err_context: | ||
| loaded_pipe.enable_model_cpu_offload() | ||
|
|
||
| self.assertTrue( | ||
| "The following pipeline components have been found" in str(err_context.exception) | ||
| and "This is incompatible with `enable_model_cpu_offload()`" in str(err_context.exception) | ||
| ) | ||
|
|
||
| @require_torch_multi_gpu | ||
| @slow | ||
| @nightly | ||
| def test_calling_sco_raises_error_device_mapped_components(self): | ||
| if "Combined" in self.pipeline_class.__name__: | ||
| return | ||
|
|
||
| # TODO (sayakpaul): skip these for now. revisit later. | ||
| components = self.get_dummy_components() | ||
| if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
| return | ||
|
|
||
| pipe = self.pipeline_class(**components) | ||
| max_model_size = max( | ||
| compute_module_sizes(module)[""] | ||
| for _, module in pipe.components.items() | ||
| if isinstance(module, torch.nn.Module) | ||
| ) | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| pipe.save_pretrained(tmpdir) | ||
| max_memory = {0: max_model_size, 1: max_model_size} | ||
| loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
|
||
| with self.assertRaises(ValueError) as err_context: | ||
| loaded_pipe.enable_sequential_cpu_offload() | ||
|
|
||
| self.assertTrue( | ||
| "The following pipeline components have been found" in str(err_context.exception) | ||
| and "This is incompatible with `enable_sequential_cpu_offload()`" in str(err_context.exception) | ||
| ) | ||
|
|
||
|
|
||
| @is_staging_test | ||
| class PipelinePushToHubTester(unittest.TestCase): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.