Skip to content

Commit d083f86

Browse files
authored
Merge branch 'main' into wan22-lightx2v
2 parents dcce164 + d45199a commit d083f86

32 files changed

+785
-126
lines changed

.github/workflows/nightly_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ jobs:
333333
additional_deps: ["peft"]
334334
- backend: "gguf"
335335
test_location: "gguf"
336-
additional_deps: ["peft"]
336+
additional_deps: ["peft", "kernels"]
337337
- backend: "torchao"
338338
test_location: "torchao"
339339
additional_deps: []

docs/source/en/quantization/gguf.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ image = pipe(prompt, generator=torch.manual_seed(0)).images[0]
5353
image.save("flux-gguf.png")
5454
```
5555

56+
## Using Optimized CUDA Kernels with GGUF
57+
58+
Optimized CUDA kernels can accelerate GGUF quantized model inference by approximately 10%. This functionality requires a compatible GPU with `torch.cuda.get_device_capability` greater than 7 and the kernels library:
59+
60+
```shell
61+
pip install -U kernels
62+
```
63+
64+
Once installed, set `DIFFUSERS_GGUF_CUDA_KERNELS=true` to use optimized kernels when available. Note that CUDA kernels may introduce minor numerical differences compared to the original GGUF implementation, potentially causing subtle visual variations in generated images. To disable CUDA kernel usage, set the environment variable `DIFFUSERS_GGUF_CUDA_KERNELS=false`.
65+
5666
## Supported Quantization Types
5767

5868
- BF16

src/diffusers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
"AutoGuidance",
140140
"ClassifierFreeGuidance",
141141
"ClassifierFreeZeroStarGuidance",
142+
"FrequencyDecoupledGuidance",
142143
"PerturbedAttentionGuidance",
143144
"SkipLayerGuidance",
144145
"SmoothedEnergyGuidance",
@@ -804,6 +805,7 @@
804805
AutoGuidance,
805806
ClassifierFreeGuidance,
806807
ClassifierFreeZeroStarGuidance,
808+
FrequencyDecoupledGuidance,
807809
PerturbedAttentionGuidance,
808810
SkipLayerGuidance,
809811
SmoothedEnergyGuidance,

src/diffusers/guiders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .auto_guidance import AutoGuidance
2323
from .classifier_free_guidance import ClassifierFreeGuidance
2424
from .classifier_free_zero_star_guidance import ClassifierFreeZeroStarGuidance
25+
from .frequency_decoupled_guidance import FrequencyDecoupledGuidance
2526
from .perturbed_attention_guidance import PerturbedAttentionGuidance
2627
from .skip_layer_guidance import SkipLayerGuidance
2728
from .smoothed_energy_guidance import SmoothedEnergyGuidance
@@ -32,6 +33,7 @@
3233
AutoGuidance,
3334
ClassifierFreeGuidance,
3435
ClassifierFreeZeroStarGuidance,
36+
FrequencyDecoupledGuidance,
3537
PerturbedAttentionGuidance,
3638
SkipLayerGuidance,
3739
SmoothedEnergyGuidance,

src/diffusers/guiders/frequency_decoupled_guidance.py

Lines changed: 327 additions & 0 deletions
Large diffs are not rendered by default.

src/diffusers/hooks/_helpers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def _register_attention_processors_metadata():
133133
skip_processor_output_fn=_skip_proc_output_fn_Attention_WanAttnProcessor2_0,
134134
),
135135
)
136+
136137
# FluxAttnProcessor
137138
AttentionProcessorRegistry.register(
138139
model_class=FluxAttnProcessor,

src/diffusers/hooks/group_offloading.py

Lines changed: 89 additions & 104 deletions
Large diffs are not rendered by default.

src/diffusers/hooks/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import torch
16+
17+
from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _FEEDFORWARD_CLASSES
18+
19+
20+
def _get_identifiable_transformer_blocks_in_module(module: torch.nn.Module):
21+
module_list_with_transformer_blocks = []
22+
for name, submodule in module.named_modules():
23+
name_endswith_identifier = any(name.endswith(identifier) for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS)
24+
is_modulelist = isinstance(submodule, torch.nn.ModuleList)
25+
if name_endswith_identifier and is_modulelist:
26+
module_list_with_transformer_blocks.append((name, submodule))
27+
return module_list_with_transformer_blocks
28+
29+
30+
def _get_identifiable_attention_layers_in_module(module: torch.nn.Module):
31+
attention_layers = []
32+
for name, submodule in module.named_modules():
33+
if isinstance(submodule, _ATTENTION_CLASSES):
34+
attention_layers.append((name, submodule))
35+
return attention_layers
36+
37+
38+
def _get_identifiable_feedforward_layers_in_module(module: torch.nn.Module):
39+
feedforward_layers = []
40+
for name, submodule in module.named_modules():
41+
if isinstance(submodule, _FEEDFORWARD_CLASSES):
42+
feedforward_layers.append((name, submodule))
43+
return feedforward_layers

src/diffusers/pipelines/flux/pipeline_flux.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def _get_clip_prompt_embeds(
310310
def encode_prompt(
311311
self,
312312
prompt: Union[str, List[str]],
313-
prompt_2: Union[str, List[str]],
313+
prompt_2: Optional[Union[str, List[str]]] = None,
314314
device: Optional[torch.device] = None,
315315
num_images_per_prompt: int = 1,
316316
prompt_embeds: Optional[torch.FloatTensor] = None,

src/diffusers/pipelines/flux/pipeline_flux_control.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def _get_clip_prompt_embeds(
324324
def encode_prompt(
325325
self,
326326
prompt: Union[str, List[str]],
327-
prompt_2: Union[str, List[str]],
327+
prompt_2: Optional[Union[str, List[str]]] = None,
328328
device: Optional[torch.device] = None,
329329
num_images_per_prompt: int = 1,
330330
prompt_embeds: Optional[torch.FloatTensor] = None,

0 commit comments

Comments
 (0)