updates.

sayakpaul · sayakpaul · commit 6523fa650ab9 · 2024-11-25T09:08:45.000+05:30
diff --git a/scripts/convert_flux_control_lora_to_diffusers.py b/scripts/convert_flux_control_lora_to_diffusers.py
@@ -1,15 +1,9 @@
 import argparse
-from contextlib import nullcontext
 
 import safetensors.torch
 import torch
-from accelerate import init_empty_weights
 from huggingface_hub import hf_hub_download
 
-from diffusers.utils.import_utils import is_accelerate_available
-
-
-CTX = init_empty_weights if is_accelerate_available else nullcontext
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--original_state_dict_repo_id", default=None, type=str)
@@ -22,27 +16,13 @@
 dtype = torch.bfloat16 if args.dtype == "bf16" else torch.float32
 
 
-# Adapted from from the original BFL codebase.
-def optionally_expand_state_dict(name: str, param: torch.Tensor, state_dict: dict) -> dict:
-    if name in state_dict:
-        print(f"Expanding '{name}' with shape {state_dict[name].shape} to model parameter with shape {param.shape}.")
-        # expand with zeros:
-        expanded_state_dict_weight = torch.zeros_like(param, device=state_dict[name].device)
-        # popular with pre-trained param for the first half. Remaining half stays with zeros.
-        slices = tuple(slice(0, dim) for dim in state_dict[name].shape)
-        expanded_state_dict_weight[slices] = state_dict[name]
-        state_dict[name] = expanded_state_dict_weight
-
-    return state_dict
-
-
 def load_original_checkpoint(args):
     if args.original_state_dict_repo_id is not None:
         ckpt_path = hf_hub_download(repo_id=args.original_state_dict_repo_id, filename=args.filename)
     elif args.checkpoint_path is not None:
         ckpt_path = args.checkpoint_path
     else:
-        raise ValueError(" please provide either `original_state_dict_repo_id` or a local `checkpoint_path`")
+        raise ValueError("Please provide either `original_state_dict_repo_id` or a local `checkpoint_path`")
 
     original_state_dict = safetensors.torch.load_file(ckpt_path)
     return original_state_dict
@@ -60,7 +40,7 @@ def convert_flux_control_lora_checkpoint_to_diffusers(
     original_state_dict, num_layers, num_single_layers, inner_dim, mlp_ratio=4.0
 ):
     converted_state_dict = {}
-    original_state_dict_keys = original_state_dict.keys()
+    original_state_dict_keys = list(original_state_dict.keys())
 
     for lora_key in ["lora_A", "lora_B"]:
         ## time_text_embed.timestep_embedder <-  time_in
@@ -346,7 +326,8 @@ def convert_flux_control_lora_checkpoint_to_diffusers(
                 original_state_dict.pop(f"final_layer.adaLN_modulation.1.{lora_key}.bias")
             )
 
-    print("Remaining:", original_state_dict.keys())
+    if len(original_state_dict) > 0:
+        raise ValueError(f"`original_state_dict` should be empty at this point but has {original_state_dict.keys()=}.")
 
     for key in list(converted_state_dict.keys()):
         converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key)
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
@@ -1925,9 +1925,11 @@ def _load_norm_into_transformer(
         transformer_keys = set(transformer_state_dict.keys())
         state_dict_keys = set(state_dict.keys())
         extra_keys = list(state_dict_keys - transformer_keys)
-        logger.warning(
-            f"Unsupported keys found in state dict when trying to load normalization layers into the transformer. The following keys will be ignored:\n{extra_keys}."
-        )
+
+        if extra_keys:
+            logger.warning(
+                f"Unsupported keys found in state dict when trying to load normalization layers into the transformer. The following keys will be ignored:\n{extra_keys}."
+            )
 
         for key in extra_keys:
             state_dict.pop(key)
@@ -2292,15 +2294,15 @@ def get_submodule(module, name):
                 )
 
                 new_weight = torch.zeros_like(
-                    expanded_module.weight.data.shape, device=module_weight.device, dtype=module_weight.dtype
+                    expanded_module.weight.data, device=module_weight.device, dtype=module_weight.dtype
                 )
                 slices = tuple(slice(0, dim) for dim in module_weight.shape)
                 new_weight[slices] = module_weight
                 expanded_module.weight.data.copy_(new_weight)
 
                 if bias:
                     new_bias = torch.zeros_like(
-                        expanded_module.bias.data.shape, device=module_bias.device, dtype=module_bias.dtype
+                        expanded_module.bias.data, device=module_bias.device, dtype=module_bias.dtype
                     )
                     slices = tuple(slice(0, dim) for dim in module_bias.shape)
                     new_bias[slices] = module_bias
diff --git a/src/diffusers/loaders/peft.py b/src/diffusers/loaders/peft.py
@@ -56,6 +56,37 @@
 }
 
 
+def _maybe_adjust_config(config):
+    rank_pattern = config["rank_pattern"].copy()
+    target_modules = config["target_modules"]
+    original_r = config["r"]
+
+    for key in list(rank_pattern.keys()):
+        key_rank = rank_pattern[key]
+
+        # try to detect ambiguity
+        exact_matches = [mod for mod in target_modules if mod == key]
+        substring_matches = [mod for mod in target_modules if key in mod and mod != key]
+        ambiguous_key = key
+
+        if exact_matches and substring_matches:
+            # if ambiguous we update the rank associated with the ambiguous key (`proj_out`, for example)
+            config["r"] = key_rank
+            # remove the ambiguous key from `rank_pattern` and update its rank to `r`, instead
+            del config["rank_pattern"][key]
+            for mod in substring_matches:
+                # avoid overwriting if the module already has a specific rank
+                if mod not in config["rank_pattern"]:
+                    config["rank_pattern"][mod] = original_r
+
+            # update the rest of the keys with the `original_r`
+            for mod in target_modules:
+                if mod != ambiguous_key and mod not in config["rank_pattern"]:
+                    config["rank_pattern"][mod] = original_r
+
+    return config
+
+
 class PeftAdapterMixin:
     """
     A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
@@ -226,7 +257,8 @@ def load_lora_adapter(self, pretrained_model_name_or_path_or_dict, prefix="trans
                 network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
 
             lora_config_kwargs = get_peft_kwargs(rank, network_alpha_dict=network_alphas, peft_state_dict=state_dict)
-            print(lora_config_kwargs)
+            lora_config_kwargs = _maybe_adjust_config(lora_config_kwargs)
+
             if "use_dora" in lora_config_kwargs:
                 if lora_config_kwargs["use_dora"]:
                     if is_peft_version("<", "0.9.0"):