Skip to content

Commit d5b403f

Browse files
authored
Merge branch 'modular-diffusers' into modular-guider-config-mixin
2 parents eb19888 + 13c51bb commit d5b403f

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

src/diffusers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
"AutoGuidance",
139139
"ClassifierFreeGuidance",
140140
"ClassifierFreeZeroStarGuidance",
141+
"PerturbedAttentionGuidance",
141142
"SkipLayerGuidance",
142143
"SmoothedEnergyGuidance",
143144
"TangentialClassifierFreeGuidance",
@@ -785,6 +786,7 @@
785786
AutoGuidance,
786787
ClassifierFreeGuidance,
787788
ClassifierFreeZeroStarGuidance,
789+
PerturbedAttentionGuidance,
788790
SkipLayerGuidance,
789791
SmoothedEnergyGuidance,
790792
TangentialClassifierFreeGuidance,

src/diffusers/guiders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .auto_guidance import AutoGuidance
2323
from .classifier_free_guidance import ClassifierFreeGuidance
2424
from .classifier_free_zero_star_guidance import ClassifierFreeZeroStarGuidance
25+
from .perturbed_attention_guidance import PerturbedAttentionGuidance
2526
from .skip_layer_guidance import SkipLayerGuidance
2627
from .smoothed_energy_guidance import SmoothedEnergyGuidance
2728
from .tangential_classifier_free_guidance import TangentialClassifierFreeGuidance
@@ -31,6 +32,7 @@
3132
AutoGuidance,
3233
ClassifierFreeGuidance,
3334
ClassifierFreeZeroStarGuidance,
35+
PerturbedAttentionGuidance,
3436
SkipLayerGuidance,
3537
SmoothedEnergyGuidance,
3638
TangentialClassifierFreeGuidance,
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from typing import List, Optional, Union
16+
17+
from ..hooks import LayerSkipConfig
18+
from .skip_layer_guidance import SkipLayerGuidance
19+
20+
21+
class PerturbedAttentionGuidance(SkipLayerGuidance):
22+
"""
23+
Perturbed Attention Guidance (PAG): https://huggingface.co/papers/2403.17377
24+
25+
The intution behind PAG can be thought of as moving the CFG predicted distribution estimates further away from
26+
worse versions of the conditional distribution estimates. PAG was one of the first techniques to introduce the idea
27+
of using a worse version of the trained model for better guiding itself in the denoising process. It perturbs the
28+
attention scores of the latent stream by replacing the score matrix with an identity matrix for selectively chosen
29+
layers.
30+
31+
Additional reading:
32+
- [Guiding a Diffusion Model with a Bad Version of Itself](https://huggingface.co/papers/2406.02507)
33+
34+
PAG is implemented as a specialization of the SkipLayerGuidance due to similarities in the configuration parameters
35+
and implementation details.
36+
37+
Args:
38+
guidance_scale (`float`, defaults to `7.5`):
39+
The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
40+
prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
41+
deterioration of image quality.
42+
perturbed_guidance_scale (`float`, defaults to `2.8`):
43+
The scale parameter for perturbed attention guidance.
44+
perturbed_guidance_start (`float`, defaults to `0.01`):
45+
The fraction of the total number of denoising steps after which perturbed attention guidance starts.
46+
perturbed_guidance_stop (`float`, defaults to `0.2`):
47+
The fraction of the total number of denoising steps after which perturbed attention guidance stops.
48+
perturbed_guidance_layers (`int` or `List[int]`, *optional*):
49+
The layer indices to apply perturbed attention guidance to. Can be a single integer or a list of integers.
50+
If not provided, `skip_layer_config` must be provided.
51+
skip_layer_config (`LayerSkipConfig` or `List[LayerSkipConfig]`, *optional*):
52+
The configuration for the perturbed attention guidance. Can be a single `LayerSkipConfig` or a list of
53+
`LayerSkipConfig`. If not provided, `perturbed_guidance_layers` must be provided.
54+
guidance_rescale (`float`, defaults to `0.0`):
55+
The rescale factor applied to the noise predictions. This is used to improve image quality and fix
56+
overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
57+
Flawed](https://huggingface.co/papers/2305.08891).
58+
use_original_formulation (`bool`, defaults to `False`):
59+
Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
60+
we use the diffusers-native implementation that has been in the codebase for a long time. See
61+
[~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
62+
start (`float`, defaults to `0.01`):
63+
The fraction of the total number of denoising steps after which guidance starts.
64+
stop (`float`, defaults to `0.2`):
65+
The fraction of the total number of denoising steps after which guidance stops.
66+
"""
67+
68+
# NOTE: The current implementation does not account for joint latent conditioning (text + image/video tokens in
69+
# the same latent stream). It assumes the entire latent is a single stream of visual tokens. It would be very
70+
# complex to support joint latent conditioning in a model-agnostic manner without specializing the implementation
71+
# for each model architecture.
72+
73+
def __init__(
74+
self,
75+
guidance_scale: float = 7.5,
76+
perturbed_guidance_scale: float = 2.8,
77+
perturbed_guidance_start: float = 0.01,
78+
perturbed_guidance_stop: float = 0.2,
79+
perturbed_guidance_layers: Optional[Union[int, List[int]]] = None,
80+
skip_layer_config: Union[LayerSkipConfig, List[LayerSkipConfig]] = None,
81+
guidance_rescale: float = 0.0,
82+
use_original_formulation: bool = False,
83+
start: float = 0.0,
84+
stop: float = 1.0,
85+
):
86+
if skip_layer_config is None:
87+
if perturbed_guidance_layers is None:
88+
raise ValueError(
89+
"`perturbed_guidance_layers` must be provided if `skip_layer_config` is not specified."
90+
)
91+
skip_layer_config = LayerSkipConfig(
92+
indices=perturbed_guidance_layers,
93+
skip_attention=False,
94+
skip_attention_scores=True,
95+
skip_ff=False,
96+
)
97+
else:
98+
if perturbed_guidance_layers is not None:
99+
raise ValueError(
100+
"`perturbed_guidance_layers` should not be provided if `skip_layer_config` is specified."
101+
)
102+
103+
super().__init__(
104+
guidance_scale=guidance_scale,
105+
skip_layer_guidance_scale=perturbed_guidance_scale,
106+
skip_layer_guidance_start=perturbed_guidance_start,
107+
skip_layer_guidance_stop=perturbed_guidance_stop,
108+
skip_layer_guidance_layers=perturbed_guidance_layers,
109+
skip_layer_config=skip_layer_config,
110+
guidance_rescale=guidance_rescale,
111+
use_original_formulation=use_original_formulation,
112+
start=start,
113+
stop=stop,
114+
)

0 commit comments

Comments
 (0)