update

DN6 · DN6 · commit 60bcc74f289b · 2025-03-18T11:56:11.000+01:00
diff --git a/src/diffusers/hooks/group_offloading.py b/src/diffusers/hooks/group_offloading.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from contextlib import nullcontext, contextmanager
+from contextlib import contextmanager, nullcontext
 from typing import Dict, List, Optional, Set, Tuple
 
 import torch
@@ -102,9 +102,7 @@ def onload_(self):
                 with self._pinned_memory_tensors() as pinned_memory:
                     for module in self.modules:
                         for param in module.parameters():
-                            param.data = pinned_memory[param].to(
-                                self.onload_device, non_blocking=self.non_blocking
-                            )
+                            param.data = pinned_memory[param].to(self.onload_device, non_blocking=self.non_blocking)
             else:
                 for group_module in self.modules:
                     for param in group_module.parameters():
@@ -392,7 +390,9 @@ def apply_group_offloading(
             module, num_blocks_per_group, offload_device, onload_device, non_blocking, stream, low_cpu_mem_usage
         )
     elif offload_type == "leaf_level":
-        _apply_group_offloading_leaf_level(module, offload_device, onload_device, non_blocking, stream, low_cpu_mem_usage)
+        _apply_group_offloading_leaf_level(
+            module, offload_device, onload_device, non_blocking, stream, low_cpu_mem_usage
+        )
     else:
         raise ValueError(f"Unsupported offload_type: {offload_type}")
 
@@ -425,11 +425,6 @@ def _apply_group_offloading_block_level(
             for overlapping computation and data transfer.
     """
 
-    # Create a pinned CPU parameter dict for async data transfer if streams are to be used
-    cpu_param_dict = None
-    if stream is not None:
-        cpu_param_dict = _get_pinned_cpu_param_dict(module)
-
     # Create module groups for ModuleList and Sequential blocks
     modules_with_group_offloading = set()
     unmatched_modules = []
@@ -522,11 +517,6 @@ def _apply_group_offloading_leaf_level(
             for overlapping computation and data transfer.
     """
 
-    # Create a pinned CPU parameter dict for async data transfer if streams are to be used
-    cpu_param_dict = None
-    if stream is not None:
-        cpu_param_dict = _get_pinned_cpu_param_dict(module)
-
     # Create module groups for leaf modules and apply group offloading hooks
     modules_with_group_offloading = set()
     for name, submodule in module.named_modules():
@@ -641,19 +631,15 @@ def _apply_lazy_group_offloading_hook(
     registry.register_hook(lazy_prefetch_hook, _LAZY_PREFETCH_GROUP_OFFLOADING)
 
 
-def _get_cpu_param_dict(module: torch.nn.Module, low_cpu_mem_usage: bool = False) -> Dict[torch.nn.Parameter, torch.Tensor]:
+def _get_cpu_param_dict(
+    module: torch.nn.Module, low_cpu_mem_usage: bool = False
+) -> Dict[torch.nn.Parameter, torch.Tensor]:
     cpu_param_dict = {}
     for param in module.parameters():
-        if low_cpu_mem_usage:
-            cpu_param_dict[param] = param.data.cpu()
-        else:
-            cpu_param_dict[param] = param.data.cpu().pin_memory()
+        cpu_param_dict[param] = param.data.cpu() if low_cpu_mem_usage else param.data.cpu().pin_memory()
 
     for buffer in module.buffers():
-        if low_cpu_mem_usage:
-            cpu_param_dict[buffer] = buffer.data.cpu()
-        else:
-            cpu_param_dict[buffer] = buffer.data.cpu().pin_memory()
+        cpu_param_dict[buffer] = buffer.data.cpu() if low_cpu_mem_usage else buffer.data.cpu().pin_memory()
 
     return cpu_param_dict
 
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -546,7 +546,7 @@ def enable_group_offload(
         num_blocks_per_group: Optional[int] = None,
         non_blocking: bool = False,
         use_stream: bool = False,
-        low_cpu_mem_usage=False
+        low_cpu_mem_usage=False,
     ) -> None:
         r"""
         Activates group offloading for the current model.
@@ -585,7 +585,14 @@ def enable_group_offload(
                 f"open an issue at https://github.com/huggingface/diffusers/issues."
             )
         apply_group_offloading(
-            self, onload_device, offload_device, offload_type, num_blocks_per_group, non_blocking, use_stream, low_cpu_mem_usage=low_cpu_mem_usage
+            self,
+            onload_device,
+            offload_device,
+            offload_type,
+            num_blocks_per_group,
+            non_blocking,
+            use_stream,
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
 
     def save_pretrained(