|  | 
|  | 1 | +# Copyright 2024 The HuggingFace Team. All rights reserved. | 
|  | 2 | +# | 
|  | 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | +# you may not use this file except in compliance with the License. | 
|  | 5 | +# You may obtain a copy of the License at | 
|  | 6 | +# | 
|  | 7 | +#     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | +# | 
|  | 9 | +# Unless required by applicable law or agreed to in writing, software | 
|  | 10 | +# distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | +# See the License for the specific language governing permissions and | 
|  | 13 | +# limitations under the License. | 
|  | 14 | + | 
|  | 15 | +import math | 
|  | 16 | +from typing import Optional, Union, Tuple, List | 
|  | 17 | + | 
|  | 18 | +import torch | 
|  | 19 | + | 
|  | 20 | +from .guider_utils import BaseGuidance, rescale_noise_cfg, _default_prepare_inputs | 
|  | 21 | + | 
|  | 22 | + | 
|  | 23 | +class CFGPlusPlusGuidance(BaseGuidance): | 
|  | 24 | +    """ | 
|  | 25 | +    CFG++: https://huggingface.co/papers/2406.08070 | 
|  | 26 | +     | 
|  | 27 | +    Args: | 
|  | 28 | +        guidance_scale (`float`, defaults to `0.7`): | 
|  | 29 | +            The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text | 
|  | 30 | +            prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and | 
|  | 31 | +            deterioration of image quality. | 
|  | 32 | +        guidance_rescale (`float`, defaults to `0.0`): | 
|  | 33 | +            The rescale factor applied to the noise predictions. This is used to improve image quality and fix | 
|  | 34 | +            overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are | 
|  | 35 | +            Flawed](https://huggingface.co/papers/2305.08891). | 
|  | 36 | +        use_original_formulation (`bool`, defaults to `False`): | 
|  | 37 | +            Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, | 
|  | 38 | +            we use the diffusers-native implementation that has been in the codebase for a long time. See | 
|  | 39 | +            [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. | 
|  | 40 | +        start (`float`, defaults to `0.0`): | 
|  | 41 | +            The fraction of the total number of denoising steps after which guidance starts. | 
|  | 42 | +        stop (`float`, defaults to `1.0`): | 
|  | 43 | +            The fraction of the total number of denoising steps after which guidance stops. | 
|  | 44 | +    """ | 
|  | 45 | + | 
|  | 46 | +    _input_predictions = ["pred_cond", "pred_uncond"] | 
|  | 47 | + | 
|  | 48 | +    def __init__( | 
|  | 49 | +        self, | 
|  | 50 | +        guidance_scale: float = 0.7, | 
|  | 51 | +        guidance_rescale: float = 0.0, | 
|  | 52 | +        use_original_formulation: bool = False, | 
|  | 53 | +        start: float = 0.0, | 
|  | 54 | +        stop: float = 1.0, | 
|  | 55 | +    ): | 
|  | 56 | +        super().__init__(start, stop) | 
|  | 57 | + | 
|  | 58 | +        self.guidance_scale = guidance_scale | 
|  | 59 | +        self.guidance_rescale = guidance_rescale | 
|  | 60 | +        self.use_original_formulation = use_original_formulation | 
|  | 61 | + | 
|  | 62 | +    def prepare_inputs(self, denoiser: torch.nn.Module, *args: Union[Tuple[torch.Tensor], List[torch.Tensor]]) -> Tuple[List[torch.Tensor], ...]: | 
|  | 63 | +        return _default_prepare_inputs(denoiser, self.num_conditions, *args) | 
|  | 64 | + | 
|  | 65 | +    def prepare_outputs(self, denoiser: torch.nn.Module, pred: torch.Tensor) -> None: | 
|  | 66 | +        self._num_outputs_prepared += 1 | 
|  | 67 | +        if self._num_outputs_prepared > self.num_conditions: | 
|  | 68 | +            raise ValueError(f"Expected {self.num_conditions} outputs, but prepare_outputs called more times.") | 
|  | 69 | +        key = self._input_predictions[self._num_outputs_prepared - 1] | 
|  | 70 | +        self._preds[key] = pred | 
|  | 71 | + | 
|  | 72 | +    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor: | 
|  | 73 | +        pred = None | 
|  | 74 | + | 
|  | 75 | +        if not self._is_cfgpp_enabled(): | 
|  | 76 | +            pred = pred_cond | 
|  | 77 | +        else: | 
|  | 78 | +            shift = pred_cond - pred_uncond | 
|  | 79 | +            pred = pred_cond if self.use_original_formulation else pred_uncond | 
|  | 80 | +            pred = pred + self.guidance_scale * shift | 
|  | 81 | + | 
|  | 82 | +        if self.guidance_rescale > 0.0: | 
|  | 83 | +            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) | 
|  | 84 | + | 
|  | 85 | +        return pred | 
|  | 86 | + | 
|  | 87 | +    def post_scheduler_step(self, pred: torch.Tensor) -> torch.Tensor: | 
|  | 88 | +        if self._is_cfgpp_enabled(): | 
|  | 89 | +            # TODO(aryan): this probably only makes sense for EulerDiscreteScheduler. Look into the others later! | 
|  | 90 | +            pred_cond = self._preds["pred_cond"] | 
|  | 91 | +            pred_uncond = self._preds["pred_uncond"] | 
|  | 92 | +            diff = pred_uncond - pred_cond | 
|  | 93 | +            pred = pred + diff * self.guidance_scale * self._sigma_next | 
|  | 94 | +        return pred | 
|  | 95 | + | 
|  | 96 | +    @property | 
|  | 97 | +    def is_conditional(self) -> bool: | 
|  | 98 | +        return self._num_outputs_prepared == 0 | 
|  | 99 | + | 
|  | 100 | +    @property | 
|  | 101 | +    def num_conditions(self) -> int: | 
|  | 102 | +        num_conditions = 1 | 
|  | 103 | +        if self._is_cfgpp_enabled(): | 
|  | 104 | +            num_conditions += 1 | 
|  | 105 | +        return num_conditions | 
|  | 106 | + | 
|  | 107 | +    def _is_cfgpp_enabled(self) -> bool: | 
|  | 108 | +        if not self._enabled: | 
|  | 109 | +            return False | 
|  | 110 | +         | 
|  | 111 | +        is_within_range = True | 
|  | 112 | +        if self._num_inference_steps is not None: | 
|  | 113 | +            skip_start_step = int(self._start * self._num_inference_steps) | 
|  | 114 | +            skip_stop_step = int(self._stop * self._num_inference_steps) | 
|  | 115 | +            is_within_range = skip_start_step <= self._step < skip_stop_step | 
|  | 116 | +         | 
|  | 117 | +        return is_within_range | 
0 commit comments