|  | 
| 65 | 65 |     numpy_to_pil, | 
| 66 | 66 | ) | 
| 67 | 67 | from ..utils.hub_utils import _check_legacy_sharding_variant_format, load_or_create_model_card, populate_model_card | 
| 68 |  | -from ..utils.torch_utils import is_compiled_module, get_device | 
|  | 68 | +from ..utils.torch_utils import get_device, is_compiled_module | 
| 69 | 69 | 
 | 
| 70 | 70 | 
 | 
| 71 | 71 | if is_torch_npu_available(): | 
| @@ -1087,9 +1087,10 @@ def remove_all_hooks(self): | 
| 1087 | 1087 |     def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None): | 
| 1088 | 1088 |         r""" | 
| 1089 | 1089 |         Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared | 
| 1090 |  | -        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the accelerator when its `forward` | 
| 1091 |  | -        method is called, and the model remains in accelerator until the next model runs. Memory savings are lower than with | 
| 1092 |  | -        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. | 
|  | 1090 | +        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the accelerator when its | 
|  | 1091 | +        `forward` method is called, and the model remains in accelerator until the next model runs. Memory savings are | 
|  | 1092 | +        lower than with `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution | 
|  | 1093 | +        of the `unet`. | 
| 1093 | 1094 | 
 | 
| 1094 | 1095 |         Arguments: | 
| 1095 | 1096 |             gpu_id (`int`, *optional*): | 
| @@ -1205,8 +1206,8 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un | 
| 1205 | 1206 |         r""" | 
| 1206 | 1207 |         Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state | 
| 1207 | 1208 |         dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU | 
| 1208 |  | -        and then moved to `torch.device('meta')` and loaded to accelerator only when their specific submodule has its `forward` | 
| 1209 |  | -        method called. Offloading happens on a submodule basis. Memory savings are higher than with | 
|  | 1209 | +        and then moved to `torch.device('meta')` and loaded to accelerator only when their specific submodule has its | 
|  | 1210 | +        `forward` method called. Offloading happens on a submodule basis. Memory savings are higher than with | 
| 1210 | 1211 |         `enable_model_cpu_offload`, but performance is lower. | 
| 1211 | 1212 | 
 | 
| 1212 | 1213 |         Arguments: | 
|  | 
0 commit comments