diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index d6efcc736487..1c85ee7e9a40 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -920,14 +920,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P else: # else let accelerate handle loading and dispatching. # Load weights and dispatch according to the device_map # by default the device_map is None and the weights are loaded on the CPU - force_hook = True device_map = _determine_device_map( model, device_map, max_memory, torch_dtype, keep_in_fp32_modules, hf_quantizer ) if device_map is None and is_sharded: # we load the parameters on the cpu device_map = {"": "cpu"} - force_hook = False try: accelerate.load_checkpoint_and_dispatch( model, @@ -937,7 +935,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P offload_folder=offload_folder, offload_state_dict=offload_state_dict, dtype=torch_dtype, - force_hooks=force_hook, strict=True, ) except AttributeError as e: @@ -967,7 +964,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P offload_folder=offload_folder, offload_state_dict=offload_state_dict, dtype=torch_dtype, - force_hooks=force_hook, strict=True, ) model._undo_temp_convert_self_to_deprecated_attention_blocks() diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index c505c5a262a3..5bb5db421b61 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -411,6 +411,13 @@ def module_is_offloaded(module): pipeline_is_sequentially_offloaded = any( module_is_sequentially_offloaded(module) for _, module in self.components.items() ) + + is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 + if is_pipeline_device_mapped: + raise ValueError( + "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline." + ) + if device and torch.device(device).type == "cuda": if pipeline_is_sequentially_offloaded and not pipeline_has_bnb: raise ValueError( @@ -422,12 +429,6 @@ def module_is_offloaded(module): "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation." ) - is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 - if is_pipeline_device_mapped: - raise ValueError( - "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` first and then call `to()`." - ) - # Display a warning in this case (the operation succeeds but the benefits are lost) pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items()) if pipeline_is_offloaded and device and torch.device(device).type == "cuda":