Skip to content

Commit cca81bf

Browse files
committed
update
1 parent 31c94a0 commit cca81bf

File tree

5 files changed

+62
-490
lines changed

5 files changed

+62
-490
lines changed

docs/source/en/using-diffusers/consisid.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ specific language governing permissions and limitations under the License.
2020
This guide will walk you through using ConsisID for use cases.
2121

2222
## Load Model Checkpoints
23-
Model weights may be stored in separate subfolders on the Hub or locally, in which case, you should use the [`~DiffusionPipeline.from_pretrained`] method.
2423

24+
Model weights may be stored in separate subfolders on the Hub or locally, in which case, you should use the [`~DiffusionPipeline.from_pretrained`] method.
2525

2626
```python
2727
# !pip install consisid_eva_clip insightface facexlib
@@ -42,6 +42,7 @@ pipe.to("cuda")
4242
```
4343

4444
## Identity-Preserving Text-to-Video
45+
4546
For identity-preserving text-to-video, pass a text prompt and an image contain clear face (e.g., preferably half-body or full-body). By default, ConsisID generates a 720x480 video for the best results.
4647

4748
```python

src/diffusers/loaders/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def text_encoder_attn_modules(text_encoder):
7070
"LoraLoaderMixin",
7171
"FluxLoraLoaderMixin",
7272
"CogVideoXLoraLoaderMixin",
73-
"ConsisIDLoraLoaderMixin",
7473
"Mochi1LoraLoaderMixin",
7574
"HunyuanVideoLoraLoaderMixin",
7675
"SanaLoraLoaderMixin",
@@ -102,7 +101,6 @@ def text_encoder_attn_modules(text_encoder):
102101
from .lora_pipeline import (
103102
AmusedLoraLoaderMixin,
104103
CogVideoXLoraLoaderMixin,
105-
ConsisIDLoraLoaderMixin,
106104
FluxLoraLoaderMixin,
107105
HunyuanVideoLoraLoaderMixin,
108106
LoraLoaderMixin,

src/diffusers/loaders/lora_pipeline.py

Lines changed: 0 additions & 307 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,313 +2590,6 @@ def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
25902590
super().unfuse_lora(components=components)
25912591

25922592

2593-
class ConsisIDLoraLoaderMixin(LoraBaseMixin):
2594-
r"""
2595-
Load LoRA layers into [`ConsisIDTransformer3DModel`]. Specific to [`ConsisIDPipeline`].
2596-
"""
2597-
2598-
_lora_loadable_modules = ["transformer"]
2599-
transformer_name = TRANSFORMER_NAME
2600-
2601-
@classmethod
2602-
@validate_hf_hub_args
2603-
# Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict
2604-
def lora_state_dict(
2605-
cls,
2606-
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
2607-
**kwargs,
2608-
):
2609-
r"""
2610-
Return state dict for lora weights and the network alphas.
2611-
2612-
<Tip warning={true}>
2613-
2614-
We support loading A1111 formatted LoRA checkpoints in a limited capacity.
2615-
2616-
This function is experimental and might change in the future.
2617-
2618-
</Tip>
2619-
2620-
Parameters:
2621-
pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
2622-
Can be either:
2623-
2624-
- A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
2625-
the Hub.
2626-
- A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
2627-
with [`ModelMixin.save_pretrained`].
2628-
- A [torch state
2629-
dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
2630-
2631-
cache_dir (`Union[str, os.PathLike]`, *optional*):
2632-
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
2633-
is not used.
2634-
force_download (`bool`, *optional*, defaults to `False`):
2635-
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
2636-
cached versions if they exist.
2637-
2638-
proxies (`Dict[str, str]`, *optional*):
2639-
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
2640-
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
2641-
local_files_only (`bool`, *optional*, defaults to `False`):
2642-
Whether to only load local model weights and configuration files or not. If set to `True`, the model
2643-
won't be downloaded from the Hub.
2644-
token (`str` or *bool*, *optional*):
2645-
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
2646-
`diffusers-cli login` (stored in `~/.huggingface`) is used.
2647-
revision (`str`, *optional*, defaults to `"main"`):
2648-
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
2649-
allowed by Git.
2650-
subfolder (`str`, *optional*, defaults to `""`):
2651-
The subfolder location of a model file within a larger model repository on the Hub or locally.
2652-
2653-
"""
2654-
# Load the main state dict first which has the LoRA layers for either of
2655-
# transformer and text encoder or both.
2656-
cache_dir = kwargs.pop("cache_dir", None)
2657-
force_download = kwargs.pop("force_download", False)
2658-
proxies = kwargs.pop("proxies", None)
2659-
local_files_only = kwargs.pop("local_files_only", None)
2660-
token = kwargs.pop("token", None)
2661-
revision = kwargs.pop("revision", None)
2662-
subfolder = kwargs.pop("subfolder", None)
2663-
weight_name = kwargs.pop("weight_name", None)
2664-
use_safetensors = kwargs.pop("use_safetensors", None)
2665-
2666-
allow_pickle = False
2667-
if use_safetensors is None:
2668-
use_safetensors = True
2669-
allow_pickle = True
2670-
2671-
user_agent = {
2672-
"file_type": "attn_procs_weights",
2673-
"framework": "pytorch",
2674-
}
2675-
2676-
state_dict = _fetch_state_dict(
2677-
pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
2678-
weight_name=weight_name,
2679-
use_safetensors=use_safetensors,
2680-
local_files_only=local_files_only,
2681-
cache_dir=cache_dir,
2682-
force_download=force_download,
2683-
proxies=proxies,
2684-
token=token,
2685-
revision=revision,
2686-
subfolder=subfolder,
2687-
user_agent=user_agent,
2688-
allow_pickle=allow_pickle,
2689-
)
2690-
2691-
is_dora_scale_present = any("dora_scale" in k for k in state_dict)
2692-
if is_dora_scale_present:
2693-
warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new."
2694-
logger.warning(warn_msg)
2695-
state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
2696-
2697-
return state_dict
2698-
2699-
def load_lora_weights(
2700-
self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
2701-
):
2702-
"""
2703-
Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
2704-
`self.text_encoder`. All kwargs are forwarded to `self.lora_state_dict`. See
2705-
[`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is loaded.
2706-
See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_transformer`] for more details on how the state
2707-
dict is loaded into `self.transformer`.
2708-
2709-
Parameters:
2710-
pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
2711-
See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
2712-
adapter_name (`str`, *optional*):
2713-
Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
2714-
`default_{i}` where i is the total number of adapters being loaded.
2715-
low_cpu_mem_usage (`bool`, *optional*):
2716-
Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
2717-
weights.
2718-
kwargs (`dict`, *optional*):
2719-
See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
2720-
"""
2721-
if not USE_PEFT_BACKEND:
2722-
raise ValueError("PEFT backend is required for this method.")
2723-
2724-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
2725-
if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
2726-
raise ValueError(
2727-
"`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
2728-
)
2729-
2730-
# if a dict is passed, copy it instead of modifying it inplace
2731-
if isinstance(pretrained_model_name_or_path_or_dict, dict):
2732-
pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
2733-
2734-
# First, ensure that the checkpoint is a compatible one and can be successfully loaded.
2735-
state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
2736-
2737-
is_correct_format = all("lora" in key for key in state_dict.keys())
2738-
if not is_correct_format:
2739-
raise ValueError("Invalid LoRA checkpoint.")
2740-
2741-
self.load_lora_into_transformer(
2742-
state_dict,
2743-
transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
2744-
adapter_name=adapter_name,
2745-
_pipeline=self,
2746-
low_cpu_mem_usage=low_cpu_mem_usage,
2747-
)
2748-
2749-
@classmethod
2750-
# Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->ConsisIDTransformer3DModel
2751-
def load_lora_into_transformer(
2752-
cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
2753-
):
2754-
"""
2755-
This will load the LoRA layers specified in `state_dict` into `transformer`.
2756-
2757-
Parameters:
2758-
state_dict (`dict`):
2759-
A standard state dict containing the lora layer parameters. The keys can either be indexed directly
2760-
into the unet or prefixed with an additional `unet` which can be used to distinguish between text
2761-
encoder lora layers.
2762-
transformer (`ConsisIDTransformer3DModel`):
2763-
The Transformer model to load the LoRA layers into.
2764-
adapter_name (`str`, *optional*):
2765-
Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
2766-
`default_{i}` where i is the total number of adapters being loaded.
2767-
low_cpu_mem_usage (`bool`, *optional*):
2768-
Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
2769-
weights.
2770-
"""
2771-
if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
2772-
raise ValueError(
2773-
"`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
2774-
)
2775-
2776-
# Load the layers corresponding to transformer.
2777-
logger.info(f"Loading {cls.transformer_name}.")
2778-
transformer.load_lora_adapter(
2779-
state_dict,
2780-
network_alphas=None,
2781-
adapter_name=adapter_name,
2782-
_pipeline=_pipeline,
2783-
low_cpu_mem_usage=low_cpu_mem_usage,
2784-
)
2785-
2786-
@classmethod
2787-
# Adapted from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.save_lora_weights without support for text encoder
2788-
def save_lora_weights(
2789-
cls,
2790-
save_directory: Union[str, os.PathLike],
2791-
transformer_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
2792-
is_main_process: bool = True,
2793-
weight_name: str = None,
2794-
save_function: Callable = None,
2795-
safe_serialization: bool = True,
2796-
):
2797-
r"""
2798-
Save the LoRA parameters corresponding to the UNet and text encoder.
2799-
2800-
Arguments:
2801-
save_directory (`str` or `os.PathLike`):
2802-
Directory to save LoRA parameters to. Will be created if it doesn't exist.
2803-
transformer_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
2804-
State dict of the LoRA layers corresponding to the `transformer`.
2805-
is_main_process (`bool`, *optional*, defaults to `True`):
2806-
Whether the process calling this is the main process or not. Useful during distributed training and you
2807-
need to call this function on all processes. In this case, set `is_main_process=True` only on the main
2808-
process to avoid race conditions.
2809-
save_function (`Callable`):
2810-
The function to use to save the state dictionary. Useful during distributed training when you need to
2811-
replace `torch.save` with another method. Can be configured with the environment variable
2812-
`DIFFUSERS_SAVE_MODE`.
2813-
safe_serialization (`bool`, *optional*, defaults to `True`):
2814-
Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
2815-
"""
2816-
state_dict = {}
2817-
2818-
if not transformer_lora_layers:
2819-
raise ValueError("You must pass `transformer_lora_layers`.")
2820-
2821-
if transformer_lora_layers:
2822-
state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
2823-
2824-
# Save the model
2825-
cls.write_lora_layers(
2826-
state_dict=state_dict,
2827-
save_directory=save_directory,
2828-
is_main_process=is_main_process,
2829-
weight_name=weight_name,
2830-
save_function=save_function,
2831-
safe_serialization=safe_serialization,
2832-
)
2833-
2834-
# Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.fuse_lora with unet->transformer
2835-
def fuse_lora(
2836-
self,
2837-
components: List[str] = ["transformer", "text_encoder"],
2838-
lora_scale: float = 1.0,
2839-
safe_fusing: bool = False,
2840-
adapter_names: Optional[List[str]] = None,
2841-
**kwargs,
2842-
):
2843-
r"""
2844-
Fuses the LoRA parameters into the original parameters of the corresponding blocks.
2845-
2846-
<Tip warning={true}>
2847-
2848-
This is an experimental API.
2849-
2850-
</Tip>
2851-
2852-
Args:
2853-
components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
2854-
lora_scale (`float`, defaults to 1.0):
2855-
Controls how much to influence the outputs with the LoRA parameters.
2856-
safe_fusing (`bool`, defaults to `False`):
2857-
Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them.
2858-
adapter_names (`List[str]`, *optional*):
2859-
Adapter names to be used for fusing. If nothing is passed, all active adapters will be fused.
2860-
2861-
Example:
2862-
2863-
```py
2864-
from diffusers import DiffusionPipeline
2865-
import torch
2866-
2867-
pipeline = DiffusionPipeline.from_pretrained(
2868-
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
2869-
).to("cuda")
2870-
pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
2871-
pipeline.fuse_lora(lora_scale=0.7)
2872-
```
2873-
"""
2874-
super().fuse_lora(
2875-
components=components, lora_scale=lora_scale, safe_fusing=safe_fusing, adapter_names=adapter_names
2876-
)
2877-
2878-
# Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.unfuse_lora with unet->transformer
2879-
def unfuse_lora(self, components: List[str] = ["transformer", "text_encoder"], **kwargs):
2880-
r"""
2881-
Reverses the effect of
2882-
[`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora).
2883-
2884-
<Tip warning={true}>
2885-
2886-
This is an experimental API.
2887-
2888-
</Tip>
2889-
2890-
Args:
2891-
components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
2892-
unfuse_transformer (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
2893-
unfuse_text_encoder (`bool`, defaults to `True`):
2894-
Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
2895-
LoRA parameters then it won't have any effect.
2896-
"""
2897-
super().unfuse_lora(components=components)
2898-
2899-
29002593
class Mochi1LoraLoaderMixin(LoraBaseMixin):
29012594
r"""
29022595
Load LoRA layers into [`MochiTransformer3DModel`]. Specific to [`MochiPipeline`].

0 commit comments

Comments
 (0)