Skip to content

Commit 4f31393

Browse files
authored
Merge branch 'main' into tru-cfg-hunyuanvideo
2 parents ba98835 + b785ddb commit 4f31393

File tree

79 files changed

+1134
-79
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1134
-79
lines changed

.github/workflows/push_tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ jobs:
8383
python utils/print_env.py
8484
- name: PyTorch CUDA checkpoint tests on Ubuntu
8585
env:
86-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
86+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
8787
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
8888
CUBLAS_WORKSPACE_CONFIG: :16:8
8989
run: |
@@ -137,7 +137,7 @@ jobs:
137137
138138
- name: Run PyTorch CUDA tests
139139
env:
140-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
140+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
141141
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
142142
CUBLAS_WORKSPACE_CONFIG: :16:8
143143
run: |

docs/source/en/using-diffusers/other-formats.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,46 @@ Benefits of using a single-file layout include:
240240
1. Easy compatibility with diffusion interfaces such as [ComfyUI](https://github.com/comfyanonymous/ComfyUI) or [Automatic1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) which commonly use a single-file layout.
241241
2. Easier to manage (download and share) a single file.
242242

243+
### DDUF
244+
245+
> [!WARNING]
246+
> DDUF is an experimental file format and APIs related to it can change in the future.
247+
248+
DDUF (**D**DUF **D**iffusion **U**nified **F**ormat) is a file format designed to make storing, distributing, and using diffusion models much easier. Built on the ZIP file format, DDUF offers a standardized, efficient, and flexible way to package all parts of a diffusion model into a single, easy-to-manage file. It provides a balance between Diffusers multi-folder format and the widely popular single-file format.
249+
250+
Learn more details about DDUF on the Hugging Face Hub [documentation](https://huggingface.co/docs/hub/dduf).
251+
252+
Pass a checkpoint to the `dduf_file` parameter to load it in [`DiffusionPipeline`].
253+
254+
```py
255+
from diffusers import DiffusionPipeline
256+
import torch
257+
258+
pipe = DiffusionPipeline.from_pretrained(
259+
"DDUF/FLUX.1-dev-DDUF", dduf_file="FLUX.1-dev.dduf", torch_dtype=torch.bfloat16
260+
).to("cuda")
261+
image = pipe(
262+
"photo a cat holding a sign that says Diffusers", num_inference_steps=50, guidance_scale=3.5
263+
).images[0]
264+
image.save("cat.png")
265+
```
266+
267+
To save a pipeline as a `.dduf` checkpoint, use the [`~huggingface_hub.export_folder_as_dduf`] utility, which takes care of all the necessary file-level validations.
268+
269+
```py
270+
from huggingface_hub import export_folder_as_dduf
271+
from diffusers import DiffusionPipeline
272+
import torch
273+
274+
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
275+
276+
save_folder = "flux-dev"
277+
pipe.save_pretrained("flux-dev")
278+
export_folder_as_dduf("flux-dev.dduf", folder_path=save_folder)
279+
280+
> [!TIP]
281+
> Packaging and loading quantized checkpoints in the DDUF format is supported as long as they respect the multi-folder structure.
282+
243283
## Convert layout and files
244284

245285
Diffusers provides many scripts and methods to convert storage layouts and file formats to enable broader support across the diffusion ecosystem.

examples/dreambooth/train_dreambooth_lora_sana.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ def log_validation(
158158
f"Running validation... \n Generating {args.num_validation_images} images with prompt:"
159159
f" {args.validation_prompt}."
160160
)
161+
if args.enable_vae_tiling:
162+
pipeline.vae.enable_tiling(tile_sample_min_height=1024, tile_sample_stride_width=1024)
163+
161164
pipeline.text_encoder = pipeline.text_encoder.to(torch.bfloat16)
162165
pipeline = pipeline.to(accelerator.device)
163166
pipeline.set_progress_bar_config(disable=True)
@@ -597,6 +600,7 @@ def parse_args(input_args=None):
597600
help="Whether to offload the VAE and the text encoder to CPU when they are not used.",
598601
)
599602
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
603+
parser.add_argument("--enable_vae_tiling", action="store_true", help="Enabla vae tiling in log validation")
600604

601605
if input_args is not None:
602606
args = parser.parse_args(input_args)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
"filelock",
102102
"flax>=0.4.1",
103103
"hf-doc-builder>=0.3.0",
104-
"huggingface-hub>=0.23.2",
104+
"huggingface-hub>=0.27.0",
105105
"requests-mock==1.10.0",
106106
"importlib_metadata",
107107
"invisible-watermark>=0.2.0",

src/diffusers/configuration_utils.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
import re
2525
from collections import OrderedDict
2626
from pathlib import Path
27-
from typing import Any, Dict, Tuple, Union
27+
from typing import Any, Dict, Optional, Tuple, Union
2828

2929
import numpy as np
30-
from huggingface_hub import create_repo, hf_hub_download
30+
from huggingface_hub import DDUFEntry, create_repo, hf_hub_download
3131
from huggingface_hub.utils import (
3232
EntryNotFoundError,
3333
RepositoryNotFoundError,
@@ -347,6 +347,7 @@ def load_config(
347347
_ = kwargs.pop("mirror", None)
348348
subfolder = kwargs.pop("subfolder", None)
349349
user_agent = kwargs.pop("user_agent", {})
350+
dduf_entries: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_entries", None)
350351

351352
user_agent = {**user_agent, "file_type": "config"}
352353
user_agent = http_user_agent(user_agent)
@@ -358,8 +359,15 @@ def load_config(
358359
"`self.config_name` is not defined. Note that one should not load a config from "
359360
"`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`"
360361
)
361-
362-
if os.path.isfile(pretrained_model_name_or_path):
362+
# Custom path for now
363+
if dduf_entries:
364+
if subfolder is not None:
365+
raise ValueError(
366+
"DDUF file only allow for 1 level of directory (e.g transformer/model1/model.safetentors is not allowed). "
367+
"Please check the DDUF structure"
368+
)
369+
config_file = cls._get_config_file_from_dduf(pretrained_model_name_or_path, dduf_entries)
370+
elif os.path.isfile(pretrained_model_name_or_path):
363371
config_file = pretrained_model_name_or_path
364372
elif os.path.isdir(pretrained_model_name_or_path):
365373
if subfolder is not None and os.path.isfile(
@@ -426,10 +434,8 @@ def load_config(
426434
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
427435
f"containing a {cls.config_name} file"
428436
)
429-
430437
try:
431-
# Load config dict
432-
config_dict = cls._dict_from_json_file(config_file)
438+
config_dict = cls._dict_from_json_file(config_file, dduf_entries=dduf_entries)
433439

434440
commit_hash = extract_commit_hash(config_file)
435441
except (json.JSONDecodeError, UnicodeDecodeError):
@@ -552,9 +558,14 @@ def extract_init_dict(cls, config_dict, **kwargs):
552558
return init_dict, unused_kwargs, hidden_config_dict
553559

554560
@classmethod
555-
def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
556-
with open(json_file, "r", encoding="utf-8") as reader:
557-
text = reader.read()
561+
def _dict_from_json_file(
562+
cls, json_file: Union[str, os.PathLike], dduf_entries: Optional[Dict[str, DDUFEntry]] = None
563+
):
564+
if dduf_entries:
565+
text = dduf_entries[json_file].read_text()
566+
else:
567+
with open(json_file, "r", encoding="utf-8") as reader:
568+
text = reader.read()
558569
return json.loads(text)
559570

560571
def __repr__(self):
@@ -616,6 +627,20 @@ def to_json_file(self, json_file_path: Union[str, os.PathLike]):
616627
with open(json_file_path, "w", encoding="utf-8") as writer:
617628
writer.write(self.to_json_string())
618629

630+
@classmethod
631+
def _get_config_file_from_dduf(cls, pretrained_model_name_or_path: str, dduf_entries: Dict[str, DDUFEntry]):
632+
# paths inside a DDUF file must always be "/"
633+
config_file = (
634+
cls.config_name
635+
if pretrained_model_name_or_path == ""
636+
else "/".join([pretrained_model_name_or_path, cls.config_name])
637+
)
638+
if config_file not in dduf_entries:
639+
raise ValueError(
640+
f"We did not manage to find the file {config_file} in the dduf file. We only have the following files {dduf_entries.keys()}"
641+
)
642+
return config_file
643+
619644

620645
def register_to_config(init):
621646
r"""

src/diffusers/dependency_versions_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"filelock": "filelock",
1010
"flax": "flax>=0.4.1",
1111
"hf-doc-builder": "hf-doc-builder>=0.3.0",
12-
"huggingface-hub": "huggingface-hub>=0.23.2",
12+
"huggingface-hub": "huggingface-hub>=0.27.0",
1313
"requests-mock": "requests-mock==1.10.0",
1414
"importlib_metadata": "importlib_metadata",
1515
"invisible-watermark": "invisible-watermark>=0.2.0",

src/diffusers/loaders/lora_pipeline.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from ..utils import (
2222
USE_PEFT_BACKEND,
2323
deprecate,
24+
get_submodule_by_name,
2425
is_peft_available,
2526
is_peft_version,
2627
is_torch_version,
@@ -1981,10 +1982,17 @@ def _maybe_expand_transformer_param_shape_or_error_(
19811982
in_features = state_dict[lora_A_weight_name].shape[1]
19821983
out_features = state_dict[lora_B_weight_name].shape[0]
19831984

1985+
# Model maybe loaded with different quantization schemes which may flatten the params.
1986+
# `bitsandbytes`, for example, flatten the weights when using 4bit. 8bit bnb models
1987+
# preserve weight shape.
1988+
module_weight_shape = cls._calculate_module_shape(model=transformer, base_module=module)
1989+
19841990
# This means there's no need for an expansion in the params, so we simply skip.
1985-
if tuple(module_weight.shape) == (out_features, in_features):
1991+
if tuple(module_weight_shape) == (out_features, in_features):
19861992
continue
19871993

1994+
# TODO (sayakpaul): We still need to consider if the module we're expanding is
1995+
# quantized and handle it accordingly if that is the case.
19881996
module_out_features, module_in_features = module_weight.shape
19891997
debug_message = ""
19901998
if in_features > module_in_features:
@@ -2080,13 +2088,16 @@ def _maybe_expand_lora_state_dict(cls, transformer, lora_state_dict):
20802088
base_weight_param = transformer_state_dict[base_param_name]
20812089
lora_A_param = lora_state_dict[f"{prefix}{k}.lora_A.weight"]
20822090

2083-
if base_weight_param.shape[1] > lora_A_param.shape[1]:
2091+
# TODO (sayakpaul): Handle the cases when we actually need to expand when using quantization.
2092+
base_module_shape = cls._calculate_module_shape(model=transformer, base_weight_param_name=base_param_name)
2093+
2094+
if base_module_shape[1] > lora_A_param.shape[1]:
20842095
shape = (lora_A_param.shape[0], base_weight_param.shape[1])
20852096
expanded_state_dict_weight = torch.zeros(shape, device=base_weight_param.device)
20862097
expanded_state_dict_weight[:, : lora_A_param.shape[1]].copy_(lora_A_param)
20872098
lora_state_dict[f"{prefix}{k}.lora_A.weight"] = expanded_state_dict_weight
20882099
expanded_module_names.add(k)
2089-
elif base_weight_param.shape[1] < lora_A_param.shape[1]:
2100+
elif base_module_shape[1] < lora_A_param.shape[1]:
20902101
raise NotImplementedError(
20912102
f"This LoRA param ({k}.lora_A.weight) has an incompatible shape {lora_A_param.shape}. Please open an issue to file for a feature request - https://github.com/huggingface/diffusers/issues/new."
20922103
)
@@ -2098,6 +2109,28 @@ def _maybe_expand_lora_state_dict(cls, transformer, lora_state_dict):
20982109

20992110
return lora_state_dict
21002111

2112+
@staticmethod
2113+
def _calculate_module_shape(
2114+
model: "torch.nn.Module",
2115+
base_module: "torch.nn.Linear" = None,
2116+
base_weight_param_name: str = None,
2117+
) -> "torch.Size":
2118+
def _get_weight_shape(weight: torch.Tensor):
2119+
return weight.quant_state.shape if weight.__class__.__name__ == "Params4bit" else weight.shape
2120+
2121+
if base_module is not None:
2122+
return _get_weight_shape(base_module.weight)
2123+
elif base_weight_param_name is not None:
2124+
if not base_weight_param_name.endswith(".weight"):
2125+
raise ValueError(
2126+
f"Invalid `base_weight_param_name` passed as it does not end with '.weight' {base_weight_param_name=}."
2127+
)
2128+
module_path = base_weight_param_name.rsplit(".weight", 1)[0]
2129+
submodule = get_submodule_by_name(model, module_path)
2130+
return _get_weight_shape(submodule.weight)
2131+
2132+
raise ValueError("Either `base_module` or `base_weight_param_name` must be provided.")
2133+
21012134

21022135
# The reason why we subclass from `StableDiffusionLoraLoaderMixin` here is because Amused initially
21032136
# relied on `StableDiffusionLoraLoaderMixin` for its LoRA support.

src/diffusers/loaders/textual_inversion.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
4040
force_download = kwargs.pop("force_download", False)
4141
proxies = kwargs.pop("proxies", None)
4242
local_files_only = kwargs.pop("local_files_only", None)
43-
token = kwargs.pop("token", None)
43+
hf_token = kwargs.pop("hf_token", None)
4444
revision = kwargs.pop("revision", None)
4545
subfolder = kwargs.pop("subfolder", None)
4646
weight_name = kwargs.pop("weight_name", None)
@@ -73,7 +73,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
7373
force_download=force_download,
7474
proxies=proxies,
7575
local_files_only=local_files_only,
76-
token=token,
76+
token=hf_token,
7777
revision=revision,
7878
subfolder=subfolder,
7979
user_agent=user_agent,
@@ -93,7 +93,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
9393
force_download=force_download,
9494
proxies=proxies,
9595
local_files_only=local_files_only,
96-
token=token,
96+
token=hf_token,
9797
revision=revision,
9898
subfolder=subfolder,
9999
user_agent=user_agent,
@@ -312,7 +312,7 @@ def load_textual_inversion(
312312
local_files_only (`bool`, *optional*, defaults to `False`):
313313
Whether to only load local model weights and configuration files or not. If set to `True`, the model
314314
won't be downloaded from the Hub.
315-
token (`str` or *bool*, *optional*):
315+
hf_token (`str` or *bool*, *optional*):
316316
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
317317
`diffusers-cli login` (stored in `~/.huggingface`) is used.
318318
revision (`str`, *optional*, defaults to `"main"`):

src/diffusers/models/attention_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ def apply_quadratic_attention(self, query: torch.Tensor, key: torch.Tensor, valu
899899
scores = torch.matmul(key.transpose(-1, -2), query)
900900
scores = scores.to(dtype=torch.float32)
901901
scores = scores / (torch.sum(scores, dim=2, keepdim=True) + self.eps)
902-
hidden_states = torch.matmul(value, scores)
902+
hidden_states = torch.matmul(value, scores.to(value.dtype))
903903
return hidden_states
904904

905905
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:

0 commit comments

Comments
 (0)