revert logic for single file

SunMarc · SunMarc · commit d3a7dc8ad031 · 2025-01-23T17:58:13.000+01:00
diff --git a/src/diffusers/loaders/single_file_model.py b/src/diffusers/loaders/single_file_model.py
@@ -13,11 +13,15 @@
 # limitations under the License.
 import importlib
 import inspect
+import re
+from contextlib import nullcontext
 from typing import Optional
 
+import torch
 from huggingface_hub.utils import validate_hf_hub_args
 
-from ..utils import deprecate, logging
+from ..quantizers import DiffusersAutoQuantizer
+from ..utils import deprecate, is_accelerate_available, logging
 from .single_file_utils import (
     SingleFileComponentError,
     convert_animatediff_checkpoint_to_diffusers,
@@ -45,6 +49,12 @@
 logger = logging.get_logger(__name__)
 
 
+if is_accelerate_available():
+    from accelerate import init_empty_weights
+
+    from ..models.modeling_utils import load_model_dict_into_meta
+
+
 SINGLE_FILE_LOADABLE_CLASSES = {
     "StableCascadeUNet": {
         "checkpoint_mapping_fn": convert_stable_cascade_unet_single_file_to_diffusers,
@@ -224,6 +234,9 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
         subfolder = kwargs.pop("subfolder", None)
         revision = kwargs.pop("revision", None)
         config_revision = kwargs.pop("config_revision", None)
+        torch_dtype = kwargs.pop("torch_dtype", None)
+        quantization_config = kwargs.pop("quantization_config", None)
+        device = kwargs.pop("device", None)
         disable_mmap = kwargs.pop("disable_mmap", False)
 
         if isinstance(pretrained_model_link_or_path_or_dict, dict):
@@ -239,6 +252,12 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
                 revision=revision,
                 disable_mmap=disable_mmap,
             )
+        if quantization_config is not None:
+            hf_quantizer = DiffusersAutoQuantizer.from_config(quantization_config)
+            hf_quantizer.validate_environment()
+
+        else:
+            hf_quantizer = None
 
         mapping_functions = SINGLE_FILE_LOADABLE_CLASSES[mapping_class_name]
 
@@ -317,9 +336,61 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
                 f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint."
             )
 
-        return cls.from_pretrained(
-            pretrained_model_name_or_path=None,
-            state_dict=diffusers_format_checkpoint,
-            config=diffusers_model_config,
-            **kwargs,
+        ctx = init_empty_weights if is_accelerate_available() else nullcontext
+        with ctx():
+            model = cls.from_config(diffusers_model_config)
+
+        # Check if `_keep_in_fp32_modules` is not None
+        use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and (
+            (torch_dtype == torch.float16) or hasattr(hf_quantizer, "use_keep_in_fp32_modules")
         )
+        if use_keep_in_fp32_modules:
+            keep_in_fp32_modules = cls._keep_in_fp32_modules
+            if not isinstance(keep_in_fp32_modules, list):
+                keep_in_fp32_modules = [keep_in_fp32_modules]
+
+        else:
+            keep_in_fp32_modules = []
+
+        if hf_quantizer is not None:
+            hf_quantizer.preprocess_model(
+                model=model,
+                device_map=None,
+                state_dict=diffusers_format_checkpoint,
+                keep_in_fp32_modules=keep_in_fp32_modules,
+            )
+
+        if is_accelerate_available():
+            param_device = torch.device(device) if device else torch.device("cpu")
+            unexpected_keys = [param_name for param_name in diffusers_format_checkpoint if param_name not in model.state_dict()]
+            load_model_dict_into_meta(
+                model,
+                diffusers_format_checkpoint,
+                dtype=torch_dtype,
+                device_map={"":param_device},
+                hf_quantizer=hf_quantizer,
+                keep_in_fp32_modules=keep_in_fp32_modules,
+                unexpected_keys=unexpected_keys,
+            )
+        else:
+            _, unexpected_keys = model.load_state_dict(diffusers_format_checkpoint, strict=False)
+
+        if model._keys_to_ignore_on_load_unexpected is not None:
+            for pat in model._keys_to_ignore_on_load_unexpected:
+                unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None]
+
+        if len(unexpected_keys) > 0:
+            logger.warning(
+                f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}"
+            )
+
+        if hf_quantizer is not None:
+            hf_quantizer.postprocess_model(model)
+            model.hf_quantizer = hf_quantizer
+
+        if torch_dtype is not None and hf_quantizer is None:
+            model.to(torch_dtype)
+
+        model.eval()
+
+        return model
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -795,8 +795,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         quantization_config = kwargs.pop("quantization_config", None)
         dduf_entries: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_entries", None)
         disable_mmap = kwargs.pop("disable_mmap", False)
-        state_dict = kwargs.pop("state_dict", None)
-        config = kwargs.pop("config", None)
 
         allow_pickle = False
         if use_safetensors is None:
@@ -867,39 +865,35 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 # The max memory utils require PyTorch >= 1.10 to have torch.cuda.mem_get_info.
                 raise ValueError("`low_cpu_mem_usage` and `device_map` require PyTorch >= 1.10.")
 
-        if (not config and state_dict) or (config and not state_dict):
-            raise ValueError("You need to pass both the config and the state dict to initalize the model.")
-
         user_agent = {
             "diffusers": __version__,
             "file_type": "model",
             "framework": "pytorch",
         }
         unused_kwargs = {}
 
-        if config is None:
-            # Load config if we don't provide a configuration
-            config_path = pretrained_model_name_or_path
+        # Load config if we don't provide a configuration
+        config_path = pretrained_model_name_or_path
 
-            # TODO: We need to let the user pass a config in from_pretrained
-            # load config
-            config, unused_kwargs, commit_hash = cls.load_config(
-                config_path,
-                cache_dir=cache_dir,
-                return_unused_kwargs=True,
-                return_commit_hash=True,
-                force_download=force_download,
-                proxies=proxies,
-                local_files_only=local_files_only,
-                token=token,
-                revision=revision,
-                subfolder=subfolder,
-                user_agent=user_agent,
-                dduf_entries=dduf_entries,
-                **kwargs,
-            )
-            # no in-place modification of the original config.
-            config = copy.deepcopy(config)
+        # TODO: We need to let the user pass a config in from_pretrained
+        # load config
+        config, unused_kwargs, commit_hash = cls.load_config(
+            config_path,
+            cache_dir=cache_dir,
+            return_unused_kwargs=True,
+            return_commit_hash=True,
+            force_download=force_download,
+            proxies=proxies,
+            local_files_only=local_files_only,
+            token=token,
+            revision=revision,
+            subfolder=subfolder,
+            user_agent=user_agent,
+            dduf_entries=dduf_entries,
+            **kwargs,
+        )
+        # no in-place modification of the original config.
+        config = copy.deepcopy(config)
 
         # determine initial quantization config.
         #######################################
@@ -951,103 +945,79 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
         is_sharded = False
         resolved_archive_file = None
-        if state_dict is None:
-            # Determine if we're loading from a directory of sharded checkpoints.
-            sharded_metadata = None
-            index_file = None
-            is_local = os.path.isdir(pretrained_model_name_or_path)
-            index_file_kwargs = {
-                "is_local": is_local,
-                "pretrained_model_name_or_path": pretrained_model_name_or_path,
-                "subfolder": subfolder or "",
-                "use_safetensors": use_safetensors,
-                "cache_dir": cache_dir,
-                "variant": variant,
-                "force_download": force_download,
-                "proxies": proxies,
-                "local_files_only": local_files_only,
-                "token": token,
-                "revision": revision,
-                "user_agent": user_agent,
-                "commit_hash": commit_hash,
-                "dduf_entries": dduf_entries,
-            }
-            index_file = _fetch_index_file(**index_file_kwargs)
-            # In case the index file was not found we still have to consider the legacy format.
-            # this becomes applicable when the variant is not None.
-            if variant is not None and (index_file is None or not os.path.exists(index_file)):
-                index_file = _fetch_index_file_legacy(**index_file_kwargs)
-            if index_file is not None and (dduf_entries or index_file.is_file()):
-                is_sharded = True
-
-            if is_sharded and from_flax:
-                raise ValueError("Loading of sharded checkpoints is not supported when `from_flax=True`.")
-
-            # load model
-            if from_flax:
-                resolved_archive_file = _get_model_file(
+
+        # Determine if we're loading from a directory of sharded checkpoints.
+        sharded_metadata = None
+        index_file = None
+        is_local = os.path.isdir(pretrained_model_name_or_path)
+        index_file_kwargs = {
+            "is_local": is_local,
+            "pretrained_model_name_or_path": pretrained_model_name_or_path,
+            "subfolder": subfolder or "",
+            "use_safetensors": use_safetensors,
+            "cache_dir": cache_dir,
+            "variant": variant,
+            "force_download": force_download,
+            "proxies": proxies,
+            "local_files_only": local_files_only,
+            "token": token,
+            "revision": revision,
+            "user_agent": user_agent,
+            "commit_hash": commit_hash,
+            "dduf_entries": dduf_entries,
+        }
+        index_file = _fetch_index_file(**index_file_kwargs)
+        # In case the index file was not found we still have to consider the legacy format.
+        # this becomes applicable when the variant is not None.
+        if variant is not None and (index_file is None or not os.path.exists(index_file)):
+            index_file = _fetch_index_file_legacy(**index_file_kwargs)
+        if index_file is not None and (dduf_entries or index_file.is_file()):
+            is_sharded = True
+
+        if is_sharded and from_flax:
+            raise ValueError("Loading of sharded checkpoints is not supported when `from_flax=True`.")
+
+        # load model
+        if from_flax:
+            resolved_archive_file = _get_model_file(
+                pretrained_model_name_or_path,
+                weights_name=FLAX_WEIGHTS_NAME,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                token=token,
+                revision=revision,
+                subfolder=subfolder,
+                user_agent=user_agent,
+                commit_hash=commit_hash,
+            )
+            model = cls.from_config(config, **unused_kwargs)
+
+            # Convert the weights
+            from .modeling_pytorch_flax_utils import load_flax_checkpoint_in_pytorch_model
+
+            model = load_flax_checkpoint_in_pytorch_model(model, resolved_archive_file)
+        else:
+            # in the case it is sharded, we have already the index
+            if is_sharded:
+                resolved_archive_file, sharded_metadata = _get_checkpoint_shard_files(
                     pretrained_model_name_or_path,
-                    weights_name=FLAX_WEIGHTS_NAME,
+                    index_file,
                     cache_dir=cache_dir,
-                    force_download=force_download,
                     proxies=proxies,
                     local_files_only=local_files_only,
                     token=token,
-                    revision=revision,
-                    subfolder=subfolder,
                     user_agent=user_agent,
-                    commit_hash=commit_hash,
+                    revision=revision,
+                    subfolder=subfolder or "",
+                    dduf_entries=dduf_entries,
                 )
-                model = cls.from_config(config, **unused_kwargs)
-
-                # Convert the weights
-                from .modeling_pytorch_flax_utils import load_flax_checkpoint_in_pytorch_model
-
-                model = load_flax_checkpoint_in_pytorch_model(model, resolved_archive_file)
-            else:
-                # in the case it is sharded, we have already the index
-                if is_sharded:
-                    resolved_archive_file, sharded_metadata = _get_checkpoint_shard_files(
-                        pretrained_model_name_or_path,
-                        index_file,
-                        cache_dir=cache_dir,
-                        proxies=proxies,
-                        local_files_only=local_files_only,
-                        token=token,
-                        user_agent=user_agent,
-                        revision=revision,
-                        subfolder=subfolder or "",
-                        dduf_entries=dduf_entries,
-                    )
-                elif use_safetensors:
-                    try:
-                        resolved_archive_file = _get_model_file(
-                            pretrained_model_name_or_path,
-                            weights_name=_add_variant(SAFETENSORS_WEIGHTS_NAME, variant),
-                            cache_dir=cache_dir,
-                            force_download=force_download,
-                            proxies=proxies,
-                            local_files_only=local_files_only,
-                            token=token,
-                            revision=revision,
-                            subfolder=subfolder,
-                            user_agent=user_agent,
-                            commit_hash=commit_hash,
-                            dduf_entries=dduf_entries,
-                        )
-
-                    except IOError as e:
-                        logger.error(f"An error occurred while trying to fetch {pretrained_model_name_or_path}: {e}")
-                        if not allow_pickle:
-                            raise
-                        logger.warning(
-                            "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead."
-                        )
-
-                if resolved_archive_file is None and not is_sharded:
+            elif use_safetensors:
+                try:
                     resolved_archive_file = _get_model_file(
                         pretrained_model_name_or_path,
-                        weights_name=_add_variant(WEIGHTS_NAME, variant),
+                        weights_name=_add_variant(SAFETENSORS_WEIGHTS_NAME, variant),
                         cache_dir=cache_dir,
                         force_download=force_download,
                         proxies=proxies,
@@ -1060,6 +1030,30 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                         dduf_entries=dduf_entries,
                     )
 
+                except IOError as e:
+                    logger.error(f"An error occurred while trying to fetch {pretrained_model_name_or_path}: {e}")
+                    if not allow_pickle:
+                        raise
+                    logger.warning(
+                        "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead."
+                    )
+
+            if resolved_archive_file is None and not is_sharded:
+                resolved_archive_file = _get_model_file(
+                    pretrained_model_name_or_path,
+                    weights_name=_add_variant(WEIGHTS_NAME, variant),
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    local_files_only=local_files_only,
+                    token=token,
+                    revision=revision,
+                    subfolder=subfolder,
+                    user_agent=user_agent,
+                    commit_hash=commit_hash,
+                    dduf_entries=dduf_entries,
+                )
+
         if not isinstance(resolved_archive_file, list):
             resolved_archive_file = [resolved_archive_file]
 
@@ -1084,7 +1078,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         if dtype_orig is not None:
             torch.set_default_dtype(dtype_orig)
 
-        if not is_sharded and state_dict is None:
+        state_dict = None
+        if not is_sharded:
             # Time to load the checkpoint
             state_dict = load_state_dict(
                 resolved_archive_file[0], disable_mmap=disable_mmap, dduf_entries=dduf_entries