Skip to content

Commit 35e859b

Browse files
authored
Merge branch 'main' into parallel-shards-loading
2 parents d34f426 + 5780776 commit 35e859b

24 files changed

+413
-122
lines changed

.github/workflows/nightly_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ jobs:
333333
additional_deps: ["peft"]
334334
- backend: "gguf"
335335
test_location: "gguf"
336-
additional_deps: ["peft"]
336+
additional_deps: ["peft", "kernels"]
337337
- backend: "torchao"
338338
test_location: "torchao"
339339
additional_deps: []

docs/source/en/quantization/gguf.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ image = pipe(prompt, generator=torch.manual_seed(0)).images[0]
5353
image.save("flux-gguf.png")
5454
```
5555

56+
## Using Optimized CUDA Kernels with GGUF
57+
58+
Optimized CUDA kernels can accelerate GGUF quantized model inference by approximately 10%. This functionality requires a compatible GPU with `torch.cuda.get_device_capability` greater than 7 and the kernels library:
59+
60+
```shell
61+
pip install -U kernels
62+
```
63+
64+
Once installed, set `DIFFUSERS_GGUF_CUDA_KERNELS=true` to use optimized kernels when available. Note that CUDA kernels may introduce minor numerical differences compared to the original GGUF implementation, potentially causing subtle visual variations in generated images. To disable CUDA kernel usage, set the environment variable `DIFFUSERS_GGUF_CUDA_KERNELS=false`.
65+
5666
## Supported Quantization Types
5767

5868
- BF16

src/diffusers/hooks/_helpers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def _register_attention_processors_metadata():
133133
skip_processor_output_fn=_skip_proc_output_fn_Attention_WanAttnProcessor2_0,
134134
),
135135
)
136+
136137
# FluxAttnProcessor
137138
AttentionProcessorRegistry.register(
138139
model_class=FluxAttnProcessor,

src/diffusers/hooks/group_offloading.py

Lines changed: 89 additions & 104 deletions
Large diffs are not rendered by default.

src/diffusers/hooks/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import torch
16+
17+
from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _FEEDFORWARD_CLASSES
18+
19+
20+
def _get_identifiable_transformer_blocks_in_module(module: torch.nn.Module):
21+
module_list_with_transformer_blocks = []
22+
for name, submodule in module.named_modules():
23+
name_endswith_identifier = any(name.endswith(identifier) for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS)
24+
is_modulelist = isinstance(submodule, torch.nn.ModuleList)
25+
if name_endswith_identifier and is_modulelist:
26+
module_list_with_transformer_blocks.append((name, submodule))
27+
return module_list_with_transformer_blocks
28+
29+
30+
def _get_identifiable_attention_layers_in_module(module: torch.nn.Module):
31+
attention_layers = []
32+
for name, submodule in module.named_modules():
33+
if isinstance(submodule, _ATTENTION_CLASSES):
34+
attention_layers.append((name, submodule))
35+
return attention_layers
36+
37+
38+
def _get_identifiable_feedforward_layers_in_module(module: torch.nn.Module):
39+
feedforward_layers = []
40+
for name, submodule in module.named_modules():
41+
if isinstance(submodule, _FEEDFORWARD_CLASSES):
42+
feedforward_layers.append((name, submodule))
43+
return feedforward_layers

src/diffusers/pipelines/flux/pipeline_flux.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def _get_clip_prompt_embeds(
310310
def encode_prompt(
311311
self,
312312
prompt: Union[str, List[str]],
313-
prompt_2: Union[str, List[str]],
313+
prompt_2: Optional[Union[str, List[str]]] = None,
314314
device: Optional[torch.device] = None,
315315
num_images_per_prompt: int = 1,
316316
prompt_embeds: Optional[torch.FloatTensor] = None,

src/diffusers/pipelines/flux/pipeline_flux_control.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def _get_clip_prompt_embeds(
324324
def encode_prompt(
325325
self,
326326
prompt: Union[str, List[str]],
327-
prompt_2: Union[str, List[str]],
327+
prompt_2: Optional[Union[str, List[str]]] = None,
328328
device: Optional[torch.device] = None,
329329
num_images_per_prompt: int = 1,
330330
prompt_embeds: Optional[torch.FloatTensor] = None,

src/diffusers/pipelines/flux/pipeline_flux_control_img2img.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def _get_clip_prompt_embeds(
335335
def encode_prompt(
336336
self,
337337
prompt: Union[str, List[str]],
338-
prompt_2: Union[str, List[str]],
338+
prompt_2: Optional[Union[str, List[str]]] = None,
339339
device: Optional[torch.device] = None,
340340
num_images_per_prompt: int = 1,
341341
prompt_embeds: Optional[torch.FloatTensor] = None,

src/diffusers/pipelines/flux/pipeline_flux_control_inpaint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ def _get_clip_prompt_embeds(
374374
def encode_prompt(
375375
self,
376376
prompt: Union[str, List[str]],
377-
prompt_2: Union[str, List[str]],
377+
prompt_2: Optional[Union[str, List[str]]] = None,
378378
device: Optional[torch.device] = None,
379379
num_images_per_prompt: int = 1,
380380
prompt_embeds: Optional[torch.FloatTensor] = None,

src/diffusers/pipelines/flux/pipeline_flux_controlnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def _get_clip_prompt_embeds(
341341
def encode_prompt(
342342
self,
343343
prompt: Union[str, List[str]],
344-
prompt_2: Union[str, List[str]],
344+
prompt_2: Optional[Union[str, List[str]]] = None,
345345
device: Optional[torch.device] = None,
346346
num_images_per_prompt: int = 1,
347347
prompt_embeds: Optional[torch.FloatTensor] = None,

0 commit comments

Comments
 (0)