sharded checkpoint compat

SunMarc · SunMarc · commit f3a4ddc27156 · 2025-01-03T12:24:34.000+01:00
diff --git a/src/diffusers/models/model_loading_utils.py b/src/diffusers/models/model_loading_utils.py
@@ -315,7 +315,8 @@ def _fetch_index_file(
                 commit_hash=commit_hash,
                 dduf_entries=dduf_entries,
             )
-            index_file = Path(index_file)
+            if not dduf_entries:
+                index_file = Path(index_file)
         except (EntryNotFoundError, EnvironmentError):
             index_file = None
 
@@ -324,7 +325,9 @@ def _fetch_index_file(
 
 # Adapted from
 # https://github.com/bghira/SimpleTuner/blob/cea2457ab063f6dedb9e697830ae68a96be90641/helpers/training/save_hooks.py#L64
-def _merge_sharded_checkpoints(sharded_ckpt_cached_folder, sharded_metadata):
+def _merge_sharded_checkpoints(
+    sharded_ckpt_cached_folder, sharded_metadata, dduf_entries: Optional[Dict[str, DDUFEntry]] = None
+):
     weight_map = sharded_metadata.get("weight_map", None)
     if weight_map is None:
         raise KeyError("'weight_map' key not found in the shard index file.")
@@ -337,14 +340,19 @@ def _merge_sharded_checkpoints(sharded_ckpt_cached_folder, sharded_metadata):
     # Load tensors from each unique file
     for file_name in files_to_load:
         part_file_path = os.path.join(sharded_ckpt_cached_folder, file_name)
-        if not os.path.exists(part_file_path):
+        if not os.path.exists(part_file_path) and (dduf_entries and part_file_path not in dduf_entries):
             raise FileNotFoundError(f"Part file {file_name} not found.")
 
         if is_safetensors:
-            with safetensors.safe_open(part_file_path, framework="pt", device="cpu") as f:
-                for tensor_key in f.keys():
-                    if tensor_key in weight_map:
-                        merged_state_dict[tensor_key] = f.get_tensor(tensor_key)
+            if dduf_entries:
+                with dduf_entries[part_file_path].as_mmap() as mm:
+                    tensors = safetensors.torch.load(mm)
+                    merged_state_dict.update(tensors)
+            else:
+                with safetensors.safe_open(part_file_path, framework="pt", device="cpu") as f:
+                    for tensor_key in f.keys():
+                        if tensor_key in weight_map:
+                            merged_state_dict[tensor_key] = f.get_tensor(tensor_key)
         else:
             merged_state_dict.update(torch.load(part_file_path, weights_only=True, map_location="cpu"))
 
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -782,7 +782,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         # this becomes applicable when the variant is not None.
         if variant is not None and (index_file is None or not os.path.exists(index_file)):
             index_file = _fetch_index_file_legacy(**index_file_kwargs)
-        if index_file is not None and index_file.is_file():
+        if index_file is not None and (dduf_entries or index_file.is_file()):
             is_sharded = True
 
         if is_sharded and from_flax:
@@ -812,7 +812,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             model = load_flax_checkpoint_in_pytorch_model(model, model_file)
         else:
             # in the case it is sharded, we have already the index
-            if is_sharded and not dduf_entries:
+            if is_sharded:
                 sharded_ckpt_cached_folder, sharded_metadata = _get_checkpoint_shard_files(
                     pretrained_model_name_or_path,
                     index_file,
@@ -823,9 +823,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     user_agent=user_agent,
                     revision=revision,
                     subfolder=subfolder or "",
+                    dduf_entries=dduf_entries,
                 )
-                if hf_quantizer is not None and is_bnb_quantization_method:
-                    model_file = _merge_sharded_checkpoints(sharded_ckpt_cached_folder, sharded_metadata)
+                if (hf_quantizer is not None and is_bnb_quantization_method) or dduf_entries:
+                    model_file = _merge_sharded_checkpoints(
+                        sharded_ckpt_cached_folder, sharded_metadata, dduf_entries=dduf_entries
+                    )
                     logger.info("Merged sharded checkpoints as `hf_quantizer` is not None.")
                     is_sharded = False
 
diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py
@@ -437,6 +437,7 @@ def _get_checkpoint_shard_files(
     user_agent=None,
     revision=None,
     subfolder="",
+    dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
 ):
     """
     For a given model:
@@ -448,11 +449,14 @@ def _get_checkpoint_shard_files(
     For the description of each arg, see [`PreTrainedModel.from_pretrained`]. `index_filename` is the full path to the
     index (downloaded and cached if `pretrained_model_name_or_path` is a model ID on the Hub).
     """
-    if not os.path.isfile(index_filename):
+    if not os.path.isfile(index_filename) and (dduf_entries and index_filename not in dduf_entries):
         raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.")
 
-    with open(index_filename, "r") as f:
-        index = json.loads(f.read())
+    if dduf_entries:
+        index = json.loads(dduf_entries[index_filename].read_text())
+    else:
+        with open(index_filename, "r") as f:
+            index = json.loads(f.read())
 
     original_shard_filenames = sorted(set(index["weight_map"].values()))
     sharded_metadata = index["metadata"]
@@ -466,6 +470,8 @@ def _get_checkpoint_shard_files(
             pretrained_model_name_or_path, subfolder=subfolder, original_shard_filenames=original_shard_filenames
         )
         return shards_path, sharded_metadata
+    elif dduf_entries:
+        return shards_path, sharded_metadata
 
     # At this stage pretrained_model_name_or_path is a model identifier on the Hub
     allow_patterns = original_shard_filenames