up

sayakpaul · sayakpaul · commit 8968e2fb37f4 · 2025-08-12T20:54:08.000+05:30
diff --git a/src/diffusers/models/model_loading_utils.py b/src/diffusers/models/model_loading_utils.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import functools
 import importlib
 import inspect
 import math
@@ -32,6 +33,7 @@
 
 from ..quantizers import DiffusersQuantizer
 from ..utils import (
+    DEFAULT_HF_PARALLEL_LOADING_WORKERS,
     GGUF_FILE_EXTENSION,
     SAFE_WEIGHTS_INDEX_NAME,
     SAFETENSORS_FILE_EXTENSION,
@@ -339,7 +341,7 @@ def check_support_param_buffer_assignment(model_to_load, state_dict, start_prefi
     return False
 
 
-def load_shard_file(
+def _load_shard_file(
     shard_file,
     model,
     model_state_dict,
@@ -357,25 +359,6 @@ def load_shard_file(
     ignore_mismatched_sizes=False,
     low_cpu_mem_usage=False,
 ):
-
-    (
-        model,
-        model_state_dict,
-        shard_file,
-        device_map,
-        dtype,
-        hf_quantizer,
-        keep_in_fp32_modules,
-        dduf_entries,
-        loaded_keys,
-        unexpected_keys,
-        offload_index,
-        offload_folder,
-        state_dict_index,
-        state_dict_folder,
-        ignore_mismatched_sizes,
-        low_cpu_mem_usage,
-    ) = args
     assign_to_params_buffers = None
     state_dict = load_state_dict(shard_file, dduf_entries=dduf_entries)
     mismatched_keys = _find_mismatched_keys(
@@ -425,19 +408,38 @@ def _load_shard_files_with_threadpool(
     ignore_mismatched_sizes=False,
     low_cpu_mem_usage=False,
 ):
-    num_workers = int(os.environ.get("HF_PARALLEL_LOADING_WORKERS", "8"))
+    num_workers = int(os.environ.get("HF_PARALLEL_LOADING_WORKERS", str(DEFAULT_HF_PARALLEL_LOADING_WORKERS)))
 
     # Do not spawn anymore workers than you need
-    num_workers = min(len(args_list), num_workers)
+    num_workers = min(len(shard_files), num_workers)
 
     logger.info(f"Loading model weights in parallel with {num_workers} workers...")
 
     error_msgs = []
     mismatched_keys = []
 
+    load_one = functools.partial(
+        _load_shard_file,
+        model=model,
+        model_state_dict=model_state_dict,
+        device_map=device_map,
+        dtype=dtype,
+        hf_quantizer=hf_quantizer,
+        keep_in_fp32_modules=keep_in_fp32_modules,
+        dduf_entries=dduf_entries,
+        loaded_keys=loaded_keys,
+        unexpected_keys=unexpected_keys,
+        offload_index=offload_index,
+        offload_folder=offload_folder,
+        state_dict_index=state_dict_index,
+        state_dict_folder=state_dict_folder,
+        ignore_mismatched_sizes=ignore_mismatched_sizes,
+        low_cpu_mem_usage=low_cpu_mem_usage,
+    )
+
     with ThreadPoolExecutor(max_workers=num_workers) as executor:
-        with logging.tqdm(total=len(args_list), desc="Loading checkpoint shards") as pbar:
-            futures = [executor.submit(load_shard_file, arg) for arg in args_list]
+        with logging.tqdm(total=len(shard_files), desc="Loading checkpoint shards") as pbar:
+            futures = [executor.submit(load_one, shard_file) for shard_file in shard_files]
             for future in as_completed(futures):
                 result = future.result()
                 offload_index, state_dict_index, _mismatched_keys, _error_msgs = result
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import copy
+import functools
 import inspect
 import itertools
 import json
@@ -70,8 +71,8 @@
     _expand_device_map,
     _fetch_index_file,
     _fetch_index_file_legacy,
-    load_shard_file,
-    load_shard_files_with_threadpool,
+    _load_shard_file,
+    _load_shard_files_with_threadpool,
     load_state_dict,
 )
 
@@ -1547,41 +1548,37 @@ def _load_pretrained_model(
             # if state dict is not None, it means that we don't need to read the files from resolved_model_file also
             resolved_model_file = [state_dict]
 
-        # prepare the arguments.
-        args_list = [
-            (
-                model,
-                model_state_dict,
-                shard_file,
-                device_map,
-                dtype,
-                hf_quantizer,
-                keep_in_fp32_modules,
-                dduf_entries,
-                loaded_keys,
-                unexpected_keys,
-                offload_index,
-                offload_folder,
-                state_dict_index,
-                state_dict_folder,
-                ignore_mismatched_sizes,
-                low_cpu_mem_usage,
-            )
-            for shard_file in resolved_model_file
-        ]
+        # Prepare the loading function sharing the attributes shared between them.
+        load_fn = functools.partial(
+            _load_shard_files_with_threadpool if is_parallel_loading_enabled else _load_shard_file,
+            model=model,
+            model_state_dict=model_state_dict,
+            device_map=device_map,
+            dtype=dtype,
+            hf_quantizer=hf_quantizer,
+            keep_in_fp32_modules=keep_in_fp32_modules,
+            dduf_entries=dduf_entries,
+            loaded_keys=loaded_keys,
+            unexpected_keys=unexpected_keys,
+            offload_index=offload_index,
+            offload_folder=offload_folder,
+            state_dict_index=state_dict_index,
+            state_dict_folder=state_dict_folder,
+            ignore_mismatched_sizes=ignore_mismatched_sizes,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+        )
 
         if is_parallel_loading_enabled:
-            offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_shard_files_with_threadpool(
-                args_list
-            )
+            offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_fn(resolved_model_file)
             error_msgs += _error_msgs
             mismatched_keys += _mismatched_keys
         else:
-            if len(args_list) > 1:
-                args_list = logging.tqdm(args_list, desc="Loading checkpoint shards")
+            shard_files = resolved_model_file
+            if len(resolved_model_file) > 1:
+                shard_files = logging.tqdm(resolved_model_file, desc="Loading checkpoint shards")
 
-            for args in args_list:
-                offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_shard_file(args)
+            for shard_file in shard_files:
+                offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_fn(shard_file)
                 error_msgs += _error_msgs
                 mismatched_keys += _mismatched_keys
 
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
@@ -20,6 +20,7 @@
 from .. import __version__
 from .constants import (
     CONFIG_NAME,
+    DEFAULT_HF_PARALLEL_LOADING_WORKERS,
     DEPRECATED_REVISION_ARGS,
     DIFFUSERS_DYNAMIC_MODULE_NAME,
     FLAX_WEIGHTS_NAME,
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
@@ -43,6 +43,7 @@
 DIFFUSERS_REQUEST_TIMEOUT = 60
 DIFFUSERS_ATTN_BACKEND = os.getenv("DIFFUSERS_ATTN_BACKEND", "native")
 DIFFUSERS_ATTN_CHECKS = os.getenv("DIFFUSERS_ATTN_CHECKS", "0") in ENV_VARS_TRUE_VALUES
+DEFAULT_HF_PARALLEL_LOADING_WORKERS = 8
 
 # Below should be `True` if the current version of `peft` and `transformers` are compatible with
 # PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are