6565 numpy_to_pil ,
6666)
6767from ..utils .hub_utils import _check_legacy_sharding_variant_format , load_or_create_model_card , populate_model_card
68- from ..utils .torch_utils import is_compiled_module
68+ from ..utils .torch_utils import get_device , is_compiled_module
6969
7070
7171if is_torch_npu_available ():
@@ -1084,19 +1084,20 @@ def remove_all_hooks(self):
10841084 accelerate .hooks .remove_hook_from_module (model , recurse = True )
10851085 self ._all_hooks = []
10861086
1087- def enable_model_cpu_offload (self , gpu_id : Optional [int ] = None , device : Union [torch .device , str ] = "cuda" ):
1087+ def enable_model_cpu_offload (self , gpu_id : Optional [int ] = None , device : Union [torch .device , str ] = None ):
10881088 r"""
10891089 Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
1090- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
1091- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
1092- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
1090+ to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the accelerator when its
1091+ `forward` method is called, and the model remains in accelerator until the next model runs. Memory savings are
1092+ lower than with `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution
1093+ of the `unet`.
10931094
10941095 Arguments:
10951096 gpu_id (`int`, *optional*):
10961097 The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
1097- device (`torch.Device` or `str`, *optional*, defaults to "cuda" ):
1098+ device (`torch.Device` or `str`, *optional*, defaults to None ):
10981099 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
1099- default to "cuda" .
1100+ automatically detect the available accelerator and use .
11001101 """
11011102 self ._maybe_raise_error_if_group_offload_active (raise_error = True )
11021103
@@ -1118,6 +1119,11 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
11181119
11191120 self .remove_all_hooks ()
11201121
1122+ if device is None :
1123+ device = get_device ()
1124+ if device == "cpu" :
1125+ raise RuntimeError ("`enable_model_cpu_offload` requires accelerator, but not found" )
1126+
11211127 torch_device = torch .device (device )
11221128 device_index = torch_device .index
11231129
@@ -1196,20 +1202,20 @@ def maybe_free_model_hooks(self):
11961202 # make sure the model is in the same state as before calling it
11971203 self .enable_model_cpu_offload (device = getattr (self , "_offload_device" , "cuda" ))
11981204
1199- def enable_sequential_cpu_offload (self , gpu_id : Optional [int ] = None , device : Union [torch .device , str ] = "cuda" ):
1205+ def enable_sequential_cpu_offload (self , gpu_id : Optional [int ] = None , device : Union [torch .device , str ] = None ):
12001206 r"""
12011207 Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state
12021208 dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU
1203- and then moved to `torch.device('meta')` and loaded to GPU only when their specific submodule has its `forward`
1204- method called. Offloading happens on a submodule basis. Memory savings are higher than with
1209+ and then moved to `torch.device('meta')` and loaded to accelerator only when their specific submodule has its
1210+ `forward` method called. Offloading happens on a submodule basis. Memory savings are higher than with
12051211 `enable_model_cpu_offload`, but performance is lower.
12061212
12071213 Arguments:
12081214 gpu_id (`int`, *optional*):
12091215 The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
1210- device (`torch.Device` or `str`, *optional*, defaults to "cuda" ):
1216+ device (`torch.Device` or `str`, *optional*, defaults to None ):
12111217 The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
1212- default to "cuda" .
1218+ automatically detect the available accelerator and use .
12131219 """
12141220 self ._maybe_raise_error_if_group_offload_active (raise_error = True )
12151221
@@ -1225,6 +1231,11 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un
12251231 "It seems like you have activated a device mapping strategy on the pipeline so calling `enable_sequential_cpu_offload() isn't allowed. You can call `reset_device_map()` first and then call `enable_sequential_cpu_offload()`."
12261232 )
12271233
1234+ if device is None :
1235+ device = get_device ()
1236+ if device == "cpu" :
1237+ raise RuntimeError ("`enable_sequential_cpu_offload` requires accelerator, but not found" )
1238+
12281239 torch_device = torch .device (device )
12291240 device_index = torch_device .index
12301241
0 commit comments