Skip to content
Merged
Show file tree
Hide file tree
Changes from 84 commits
Commits
Show all changes
86 commits
Select commit Hold shift + click to select a range
d53f848
add transformer pipeline first version
leffff Oct 4, 2025
7db6093
updates
leffff Oct 6, 2025
a0cf07f
fix 5sec generation
leffff Oct 9, 2025
0bd738f
Merge branch 'huggingface:main' into main
leffff Oct 9, 2025
c8f3a36
rewrite Kandinsky5T2VPipeline to diffusers style
leffff Oct 10, 2025
86b6c2b
Merge branch 'huggingface:main' into main
leffff Oct 10, 2025
723d149
add multiprompt support
leffff Oct 10, 2025
22e14bd
remove prints in pipeline
leffff Oct 10, 2025
70fa62b
add nabla attention
leffff Oct 12, 2025
07e11b2
Merge branch 'huggingface:main' into main
leffff Oct 12, 2025
45240a7
Wrap Transformer in Diffusers style
leffff Oct 13, 2025
43bd1e8
fix license
leffff Oct 13, 2025
f35c279
Merge branch 'huggingface:main' into main
leffff Oct 13, 2025
149fd53
fix prompt type
leffff Oct 13, 2025
e3a3e9d
Merge branch 'main' of https://github.com/leffff/diffusers
leffff Oct 13, 2025
7af80e9
add gradient checkpointing and peft support
leffff Oct 14, 2025
04efb19
add usage example
leffff Oct 14, 2025
4aa22f3
Merge branch 'main' into main
leffff Oct 14, 2025
235f0d5
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 14, 2025
88a8eea
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 14, 2025
f52f3b4
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 14, 2025
0190e55
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 14, 2025
d62dffc
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 14, 2025
7084106
remove unused imports
leffff Oct 14, 2025
d5dcd94
Merge branch 'huggingface:main' into main
leffff Oct 15, 2025
b615d5c
add 10 second models support
leffff Oct 15, 2025
6a0233e
Merge branch 'main' of https://github.com/leffff/diffusers
leffff Oct 15, 2025
588c12a
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 16, 2025
327ab84
remove no_grad and simplified prompt paddings
leffff Oct 16, 2025
9b06afb
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 16, 2025
8fd22c0
merge
leffff Oct 16, 2025
28458d0
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 16, 2025
e7b91ed
merge suggestions
leffff Oct 16, 2025
cd3cc61
moved template to __init__
leffff Oct 16, 2025
4450265
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 16, 2025
b9a3be2
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 16, 2025
78a23b9
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 16, 2025
56b90b1
moved sdps inside processor
leffff Oct 16, 2025
600e9d6
Merge branch 'main' of https://github.com/leffff/diffusers
leffff Oct 16, 2025
31a1474
remove oneline function
leffff Oct 16, 2025
894aa98
remove reset_dtype methods
leffff Oct 16, 2025
c8be081
Transformer: move all methods to forward
leffff Oct 16, 2025
3ffdf7f
separated prompt encoding
leffff Oct 16, 2025
b0e1b86
Merge branch 'main' into main
leffff Oct 16, 2025
9f52335
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 16, 2025
cc46e2d
refactoring
leffff Oct 16, 2025
573b966
Merge branch 'main' of https://github.com/leffff/diffusers
leffff Oct 16, 2025
9672c6b
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 16, 2025
1e597cb
Merge branch 'main' of https://github.com/leffff/diffusers
leffff Oct 16, 2025
900feba
refactoring acording to https://github.com/huggingface/diffusers/comm…
leffff Oct 17, 2025
3839f5e
Merge branch 'main' into main
yiyixuxu Oct 17, 2025
226bbf8
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
9504fb0
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
f0eca08
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
cc74c1e
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
cb915d7
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
9aa3c2e
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
feac8f0
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
d3b9597
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
693b9aa
Update src/diffusers/models/transformers/transformer_kandinsky.py
leffff Oct 17, 2025
e2ed6ec
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
2925447
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
b02ad82
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
dc67c2b
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
d0fc426
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
222ba4c
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
3a49505
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
1e12017
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
5a30079
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
0d96ecf
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
aadafc1
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
54cf03c
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
22c503f
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
211d3dd
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
70cfb9e
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
6e83133
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
7ad87f3
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
bf229af
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
06afd9b
Update src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py
leffff Oct 17, 2025
e1a635e
fixed
leffff Oct 17, 2025
e4856e5
Merge branch 'main' into main
leffff Oct 17, 2025
1bf19f0
style +copies
yiyixuxu Oct 18, 2025
1746f6d
Update src/diffusers/models/transformers/transformer_kandinsky.py
yiyixuxu Oct 18, 2025
5bb1657
more
yiyixuxu Oct 18, 2025
a26300f
Apply suggestions from code review
yiyixuxu Oct 18, 2025
ecbe522
add lora loader doc
yiyixuxu Oct 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@
"HunyuanVideoTransformer3DModel",
"I2VGenXLUNet",
"Kandinsky3UNet",
"Kandinsky5Transformer3DModel",
"LatteTransformer3DModel",
"LTXVideoTransformer3DModel",
"Lumina2Transformer2DModel",
Expand Down Expand Up @@ -474,6 +475,7 @@
"ImageTextPipelineOutput",
"Kandinsky3Img2ImgPipeline",
"Kandinsky3Pipeline",
"Kandinsky5T2VPipeline",
"KandinskyCombinedPipeline",
"KandinskyImg2ImgCombinedPipeline",
"KandinskyImg2ImgPipeline",
Expand Down Expand Up @@ -912,6 +914,7 @@
HunyuanVideoTransformer3DModel,
I2VGenXLUNet,
Kandinsky3UNet,
Kandinsky5Transformer3DModel,
LatteTransformer3DModel,
LTXVideoTransformer3DModel,
Lumina2Transformer2DModel,
Expand Down Expand Up @@ -1136,6 +1139,7 @@
ImageTextPipelineOutput,
Kandinsky3Img2ImgPipeline,
Kandinsky3Pipeline,
Kandinsky5T2VPipeline,
KandinskyCombinedPipeline,
KandinskyImg2ImgCombinedPipeline,
KandinskyImg2ImgPipeline,
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/loaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def text_encoder_attn_modules(text_encoder):
"SanaLoraLoaderMixin",
"Lumina2LoraLoaderMixin",
"WanLoraLoaderMixin",
"KandinskyLoraLoaderMixin",
"HiDreamImageLoraLoaderMixin",
"SkyReelsV2LoraLoaderMixin",
"QwenImageLoraLoaderMixin",
Expand Down Expand Up @@ -115,6 +116,7 @@ def text_encoder_attn_modules(text_encoder):
FluxLoraLoaderMixin,
HiDreamImageLoraLoaderMixin,
HunyuanVideoLoraLoaderMixin,
KandinskyLoraLoaderMixin,
LoraLoaderMixin,
LTXVideoLoraLoaderMixin,
Lumina2LoraLoaderMixin,
Expand Down
285 changes: 285 additions & 0 deletions src/diffusers/loaders/lora_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3639,6 +3639,291 @@ def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
super().unfuse_lora(components=components, **kwargs)


class KandinskyLoraLoaderMixin(LoraBaseMixin):
r"""
Load LoRA layers into [`Kandinsky5Transformer3DModel`],
"""

_lora_loadable_modules = ["transformer"]
transformer_name = TRANSFORMER_NAME

@classmethod
@validate_hf_hub_args
def lora_state_dict(
cls,
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
**kwargs,
):
r"""
Return state dict for lora weights and the network alphas.

Parameters:
pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
Can be either:
- A string, the *model id* of a pretrained model hosted on the Hub.
- A path to a *directory* containing the model weights.
- A [torch state
dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).

cache_dir (`Union[str, os.PathLike]`, *optional*):
Path to a directory where a downloaded pretrained model configuration is cached.
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force the (re-)download of the model weights.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint.
local_files_only (`bool`, *optional*, defaults to `False`):
Whether to only load local model weights and configuration files.
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files.
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use.
subfolder (`str`, *optional*, defaults to `""`):
The subfolder location of a model file within a larger model repository.
weight_name (`str`, *optional*, defaults to None):
Name of the serialized state dict file.
use_safetensors (`bool`, *optional*):
Whether to use safetensors for loading.
return_lora_metadata (`bool`, *optional*, defaults to False):
When enabled, additionally return the LoRA adapter metadata.
"""
# Load the main state dict first which has the LoRA layers
cache_dir = kwargs.pop("cache_dir", None)
force_download = kwargs.pop("force_download", False)
proxies = kwargs.pop("proxies", None)
local_files_only = kwargs.pop("local_files_only", None)
token = kwargs.pop("token", None)
revision = kwargs.pop("revision", None)
subfolder = kwargs.pop("subfolder", None)
weight_name = kwargs.pop("weight_name", None)
use_safetensors = kwargs.pop("use_safetensors", None)
return_lora_metadata = kwargs.pop("return_lora_metadata", False)

allow_pickle = False
if use_safetensors is None:
use_safetensors = True
allow_pickle = True

user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}

state_dict, metadata = _fetch_state_dict(
pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
weight_name=weight_name,
use_safetensors=use_safetensors,
local_files_only=local_files_only,
cache_dir=cache_dir,
force_download=force_download,
proxies=proxies,
token=token,
revision=revision,
subfolder=subfolder,
user_agent=user_agent,
allow_pickle=allow_pickle,
)

is_dora_scale_present = any("dora_scale" in k for k in state_dict)
if is_dora_scale_present:
warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new."
logger.warning(warn_msg)
state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}

out = (state_dict, metadata) if return_lora_metadata else state_dict
return out

def load_lora_weights(
self,
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
adapter_name: Optional[str] = None,
hotswap: bool = False,
**kwargs,
):
"""
Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer`

Parameters:
pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
See [`~loaders.KandinskyLoraLoaderMixin.lora_state_dict`].
adapter_name (`str`, *optional*):
Adapter name to be used for referencing the loaded adapter model.
hotswap (`bool`, *optional*):
Whether to substitute an existing (LoRA) adapter with the newly loaded adapter in-place.
low_cpu_mem_usage (`bool`, *optional*):
Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
weights.
kwargs (`dict`, *optional*):
See [`~loaders.KandinskyLoraLoaderMixin.lora_state_dict`].
"""
if not USE_PEFT_BACKEND:
raise ValueError("PEFT backend is required for this method.")

low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
raise ValueError(
"`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
)

# if a dict is passed, copy it instead of modifying it inplace
if isinstance(pretrained_model_name_or_path_or_dict, dict):
pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()

# First, ensure that the checkpoint is a compatible one and can be successfully loaded.
kwargs["return_lora_metadata"] = True
state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)

is_correct_format = all("lora" in key for key in state_dict.keys())
if not is_correct_format:
raise ValueError("Invalid LoRA checkpoint.")

# Load LoRA into transformer
self.load_lora_into_transformer(
state_dict,
transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
adapter_name=adapter_name,
metadata=metadata,
_pipeline=self,
low_cpu_mem_usage=low_cpu_mem_usage,
hotswap=hotswap,
)

@classmethod
def load_lora_into_transformer(
cls,
state_dict,
transformer,
adapter_name=None,
_pipeline=None,
low_cpu_mem_usage=False,
hotswap: bool = False,
metadata=None,
):
"""
Load the LoRA layers specified in `state_dict` into `transformer`.

Parameters:
state_dict (`dict`):
A standard state dict containing the lora layer parameters.
transformer (`Kandinsky5Transformer3DModel`):
The transformer model to load the LoRA layers into.
adapter_name (`str`, *optional*):
Adapter name to be used for referencing the loaded adapter model.
low_cpu_mem_usage (`bool`, *optional*):
Speed up model loading by only loading the pretrained LoRA weights.
hotswap (`bool`, *optional*):
See [`~loaders.KandinskyLoraLoaderMixin.load_lora_weights`].
metadata (`dict`):
Optional LoRA adapter metadata.
"""
if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
raise ValueError(
"`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
)

# Load the layers corresponding to transformer.
logger.info(f"Loading {cls.transformer_name}.")
transformer.load_lora_adapter(
state_dict,
network_alphas=None,
adapter_name=adapter_name,
metadata=metadata,
_pipeline=_pipeline,
low_cpu_mem_usage=low_cpu_mem_usage,
hotswap=hotswap,
)

@classmethod
def save_lora_weights(
cls,
save_directory: Union[str, os.PathLike],
transformer_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
is_main_process: bool = True,
weight_name: str = None,
save_function: Callable = None,
safe_serialization: bool = True,
transformer_lora_adapter_metadata=None,
):
r"""
Save the LoRA parameters corresponding to the transformer and text encoders.

Arguments:
save_directory (`str` or `os.PathLike`):
Directory to save LoRA parameters to.
transformer_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
State dict of the LoRA layers corresponding to the `transformer`.
is_main_process (`bool`, *optional*, defaults to `True`):
Whether the process calling this is the main process.
save_function (`Callable`):
The function to use to save the state dictionary.
safe_serialization (`bool`, *optional*, defaults to `True`):
Whether to save the model using `safetensors` or the traditional PyTorch way.
transformer_lora_adapter_metadata:
LoRA adapter metadata associated with the transformer.
"""
lora_layers = {}
lora_metadata = {}

if transformer_lora_layers:
lora_layers[cls.transformer_name] = transformer_lora_layers
lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata

if not lora_layers:
raise ValueError("You must pass at least one of `transformer_lora_layers`")

cls._save_lora_weights(
save_directory=save_directory,
lora_layers=lora_layers,
lora_metadata=lora_metadata,
is_main_process=is_main_process,
weight_name=weight_name,
save_function=save_function,
safe_serialization=safe_serialization,
)

def fuse_lora(
self,
components: List[str] = ["transformer"],
lora_scale: float = 1.0,
safe_fusing: bool = False,
adapter_names: Optional[List[str]] = None,
**kwargs,
):
r"""
Fuses the LoRA parameters into the original parameters of the corresponding blocks.

Args:
components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
lora_scale (`float`, defaults to 1.0):
Controls how much to influence the outputs with the LoRA parameters.
safe_fusing (`bool`, defaults to `False`):
Whether to check fused weights for NaN values before fusing.
adapter_names (`List[str]`, *optional*):
Adapter names to be used for fusing.

Example:
```py
from diffusers import Kandinsky5T2VPipeline

pipeline = Kandinsky5T2VPipeline.from_pretrained("ai-forever/Kandinsky-5.0-T2V")
pipeline.load_lora_weights("path/to/lora.safetensors")
pipeline.fuse_lora(lora_scale=0.7)
```
"""
super().fuse_lora(
components=components,
lora_scale=lora_scale,
safe_fusing=safe_fusing,
adapter_names=adapter_names,
**kwargs,
)

def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
r"""
Reverses the effect of [`pipe.fuse_lora()`].

Args:
components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
"""
super().unfuse_lora(components=components, **kwargs)


class WanLoraLoaderMixin(LoraBaseMixin):
r"""
Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`].
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
_import_structure["transformers.transformer_hidream_image"] = ["HiDreamImageTransformer2DModel"]
_import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"]
_import_structure["transformers.transformer_hunyuan_video_framepack"] = ["HunyuanVideoFramepackTransformer3DModel"]
_import_structure["transformers.transformer_kandinsky"] = ["Kandinsky5Transformer3DModel"]
_import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
_import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
_import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
Expand Down Expand Up @@ -182,6 +183,7 @@
HunyuanDiT2DModel,
HunyuanVideoFramepackTransformer3DModel,
HunyuanVideoTransformer3DModel,
Kandinsky5Transformer3DModel,
LatteTransformer3DModel,
LTXVideoTransformer3DModel,
Lumina2Transformer2DModel,
Expand Down
1 change: 1 addition & 0 deletions src/diffusers/models/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .transformer_hidream_image import HiDreamImageTransformer2DModel
from .transformer_hunyuan_video import HunyuanVideoTransformer3DModel
from .transformer_hunyuan_video_framepack import HunyuanVideoFramepackTransformer3DModel
from .transformer_kandinsky import Kandinsky5Transformer3DModel
from .transformer_ltx import LTXVideoTransformer3DModel
from .transformer_lumina2 import Lumina2Transformer2DModel
from .transformer_mochi import MochiTransformer3DModel
Expand Down
Loading