Skip to content

Commit 6b05803

Browse files
committed
apply changes to other pipelines
1 parent b7959dc commit 6b05803

File tree

5 files changed

+143
-59
lines changed

5 files changed

+143
-59
lines changed

src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,8 @@ def __call__(
486486
scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
487487
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This
488488
parameter in the pipeline is there to support future guidance-distilled models when they come up. It is
489-
ignored when not using guidance distilled models. To enable classifier-free guidance for a non
490-
guidance-distilled model, please pass `true_cfg_scale > 1.0` and `negative_prompt` (even an empty
491-
negative prompt like " ") should enable classifier-free guidance computations.
489+
ignored when not using guidance distilled models. To enable traditional classifier-free guidance, please pass `true_cfg_scale > 1.0`
490+
and `negative_prompt` (even an empty negative prompt like " " should enable classifier-free guidance computations).
492491
num_images_per_prompt (`int`, *optional*, defaults to 1):
493492
The number of images to generate per prompt.
494493
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_controlnet.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ def __call__(
535535
width: Optional[int] = None,
536536
num_inference_steps: int = 50,
537537
sigmas: Optional[List[float]] = None,
538-
guidance_scale: float = 1.0,
538+
guidance_scale: Optional[float] = None,
539539
control_guidance_start: Union[float, List[float]] = 0.0,
540540
control_guidance_end: Union[float, List[float]] = 1.0,
541541
control_image: PipelineImageInput = None,
@@ -566,7 +566,12 @@ def __call__(
566566
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
567567
not greater than `1`).
568568
true_cfg_scale (`float`, *optional*, defaults to 1.0):
569-
When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
569+
Guidance scale as defined in [Classifier-Free Diffusion
570+
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
571+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
572+
setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to
573+
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
574+
quality.
570575
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
571576
The height in pixels of the generated image. This is set to 1024 by default for the best results.
572577
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -578,12 +583,15 @@ def __call__(
578583
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
579584
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
580585
will be used.
581-
guidance_scale (`float`, *optional*, defaults to 3.5):
582-
Guidance scale as defined in [Classifier-Free Diffusion
583-
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
584-
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
585-
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
586-
the text `prompt`, usually at the expense of lower image quality.
586+
guidance_scale (`float`, *optional*, defaults to None):
587+
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
588+
where the guidance scale is applied during inference through noise prediction rescaling, guidance
589+
distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
590+
scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
591+
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This
592+
parameter in the pipeline is there to support future guidance-distilled models when they come up. It is
593+
ignored when not using guidance distilled models. To enable traditional classifier-free guidance, please pass `true_cfg_scale > 1.0`
594+
and `negative_prompt` (even an empty negative prompt like " " should enable classifier-free guidance computations).
587595
num_images_per_prompt (`int`, *optional*, defaults to 1):
588596
The number of images to generate per prompt.
589597
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -674,6 +682,16 @@ def __call__(
674682
has_neg_prompt = negative_prompt is not None or (
675683
negative_prompt_embeds is not None and negative_prompt_embeds_mask is not None
676684
)
685+
686+
if true_cfg_scale > 1 and not has_neg_prompt:
687+
logger.warning(
688+
f"true_cfg_scale is passed as {true_cfg_scale}, but classifier-free guidance is not enabled since no negative_prompt is provided."
689+
)
690+
elif true_cfg_scale <= 1 and has_neg_prompt:
691+
logger.warning(
692+
" negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1"
693+
)
694+
677695
do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
678696
prompt_embeds, prompt_embeds_mask = self.encode_prompt(
679697
prompt=prompt,
@@ -822,10 +840,17 @@ def __call__(
822840
controlnet_keep.append(keeps[0] if isinstance(self.controlnet, QwenImageControlNetModel) else keeps)
823841

824842
# handle guidance
825-
if self.transformer.config.guidance_embeds:
843+
if self.transformer.config.guidance_embeds and guidance_scale is None:
844+
raise ValueError("guidance_scale is required for guidance-distilled model.")
845+
elif self.transformer.config.guidance_embeds:
826846
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
827847
guidance = guidance.expand(latents.shape[0])
828-
else:
848+
elif not self.transformer.config.guidance_embeds and guidance_scale is not None:
849+
logger.warning(
850+
f"guidance_scale is passed as {guidance_scale}, but ignored since the model is not guidance-distilled."
851+
)
852+
guidance = None
853+
elif not self.transformer.config.guidance_embeds and guidance_scale is None:
829854
guidance = None
830855

831856
if self.attention_kwargs is None:

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ def __call__(
532532
width: Optional[int] = None,
533533
num_inference_steps: int = 50,
534534
sigmas: Optional[List[float]] = None,
535-
guidance_scale: float = 1.0,
535+
guidance_scale: Optional[float] = None,
536536
num_images_per_prompt: int = 1,
537537
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
538538
latents: Optional[torch.Tensor] = None,
@@ -558,8 +558,13 @@ def __call__(
558558
The prompt or prompts not to guide the image generation. If not defined, one has to pass
559559
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
560560
not greater than `1`).
561-
true_cfg_scale (`float`, *optional*, defaults to 1.0):
562-
When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
561+
true_cfg_scale (`float`, *optional*, defaults to 1.0): true_cfg_scale (`float`, *optional*, defaults to 1.0):
562+
Guidance scale as defined in [Classifier-Free Diffusion
563+
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
564+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
565+
setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to
566+
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
567+
quality.
563568
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
564569
The height in pixels of the generated image. This is set to 1024 by default for the best results.
565570
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -571,17 +576,15 @@ def __call__(
571576
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
572577
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
573578
will be used.
574-
guidance_scale (`float`, *optional*, defaults to 3.5):
575-
Guidance scale as defined in [Classifier-Free Diffusion
576-
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
577-
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
578-
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
579-
the text `prompt`, usually at the expense of lower image quality.
580-
581-
This parameter in the pipeline is there to support future guidance-distilled models when they come up.
582-
Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance,
583-
please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should
584-
enable classifier-free guidance computations.
579+
guidance_scale (`float`, *optional*, defaults to None):
580+
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
581+
where the guidance scale is applied during inference through noise prediction rescaling, guidance
582+
distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
583+
scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
584+
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This
585+
parameter in the pipeline is there to support future guidance-distilled models when they come up. It is
586+
ignored when not using guidance distilled models. To enable traditional classifier-free guidance, please pass `true_cfg_scale > 1.0`
587+
and `negative_prompt` (even an empty negative prompt like " " should enable classifier-free guidance computations).
585588
num_images_per_prompt (`int`, *optional*, defaults to 1):
586589
The number of images to generate per prompt.
587590
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -672,6 +675,16 @@ def __call__(
672675
has_neg_prompt = negative_prompt is not None or (
673676
negative_prompt_embeds is not None and negative_prompt_embeds_mask is not None
674677
)
678+
679+
if true_cfg_scale > 1 and not has_neg_prompt:
680+
logger.warning(
681+
f"true_cfg_scale is passed as {true_cfg_scale}, but classifier-free guidance is not enabled since no negative_prompt is provided."
682+
)
683+
elif true_cfg_scale <= 1 and has_neg_prompt:
684+
logger.warning(
685+
" negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1"
686+
)
687+
675688
do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
676689
prompt_embeds, prompt_embeds_mask = self.encode_prompt(
677690
image=prompt_image,
@@ -734,10 +747,17 @@ def __call__(
734747
self._num_timesteps = len(timesteps)
735748

736749
# handle guidance
737-
if self.transformer.config.guidance_embeds:
750+
if self.transformer.config.guidance_embeds and guidance_scale is None:
751+
raise ValueError("guidance_scale is required for guidance-distilled model.")
752+
elif self.transformer.config.guidance_embeds:
738753
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
739754
guidance = guidance.expand(latents.shape[0])
740-
else:
755+
elif not self.transformer.config.guidance_embeds and guidance_scale is not None:
756+
logger.warning(
757+
f"guidance_scale is passed as {guidance_scale}, but ignored since the model is not guidance-distilled."
758+
)
759+
guidance = None
760+
elif not self.transformer.config.guidance_embeds and guidance_scale is None:
741761
guidance = None
742762

743763
if self.attention_kwargs is None:

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ def __call__(
511511
strength: float = 0.6,
512512
num_inference_steps: int = 50,
513513
sigmas: Optional[List[float]] = None,
514-
guidance_scale: float = 1.0,
514+
guidance_scale: Optional[float] = None,
515515
num_images_per_prompt: int = 1,
516516
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
517517
latents: Optional[torch.Tensor] = None,
@@ -544,7 +544,12 @@ def __call__(
544544
list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
545545
latents as `image`, but if passing latents directly it is not encoded again.
546546
true_cfg_scale (`float`, *optional*, defaults to 1.0):
547-
When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
547+
Guidance scale as defined in [Classifier-Free Diffusion
548+
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
549+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
550+
setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to
551+
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
552+
quality.
548553
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
549554
The height in pixels of the generated image. This is set to 1024 by default for the best results.
550555
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -562,17 +567,15 @@ def __call__(
562567
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
563568
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
564569
will be used.
565-
guidance_scale (`float`, *optional*, defaults to 3.5):
566-
Guidance scale as defined in [Classifier-Free Diffusion
567-
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
568-
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
569-
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
570-
the text `prompt`, usually at the expense of lower image quality.
571-
572-
This parameter in the pipeline is there to support future guidance-distilled models when they come up.
573-
Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance,
574-
please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should
575-
enable classifier-free guidance computations.
570+
guidance_scale (`float`, *optional*, defaults to None):
571+
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
572+
where the guidance scale is applied during inference through noise prediction rescaling, guidance
573+
distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
574+
scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
575+
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This
576+
parameter in the pipeline is there to support future guidance-distilled models when they come up. It is
577+
ignored when not using guidance distilled models. To enable traditional classifier-free guidance, please pass `true_cfg_scale > 1.0`
578+
and `negative_prompt` (even an empty negative prompt like " " should enable classifier-free guidance computations).
576579
num_images_per_prompt (`int`, *optional*, defaults to 1):
577580
The number of images to generate per prompt.
578581
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -657,6 +660,16 @@ def __call__(
657660
has_neg_prompt = negative_prompt is not None or (
658661
negative_prompt_embeds is not None and negative_prompt_embeds_mask is not None
659662
)
663+
664+
if true_cfg_scale > 1 and not has_neg_prompt:
665+
logger.warning(
666+
f"true_cfg_scale is passed as {true_cfg_scale}, but classifier-free guidance is not enabled since no negative_prompt is provided."
667+
)
668+
elif true_cfg_scale <= 1 and has_neg_prompt:
669+
logger.warning(
670+
" negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1"
671+
)
672+
660673
do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
661674
prompt_embeds, prompt_embeds_mask = self.encode_prompt(
662675
prompt=prompt,
@@ -721,10 +734,17 @@ def __call__(
721734
self._num_timesteps = len(timesteps)
722735

723736
# handle guidance
724-
if self.transformer.config.guidance_embeds:
737+
if self.transformer.config.guidance_embeds and guidance_scale is None:
738+
raise ValueError("guidance_scale is required for guidance-distilled model.")
739+
elif self.transformer.config.guidance_embeds:
725740
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
726741
guidance = guidance.expand(latents.shape[0])
727-
else:
742+
elif not self.transformer.config.guidance_embeds and guidance_scale is not None:
743+
logger.warning(
744+
f"guidance_scale is passed as {guidance_scale}, but ignored since the model is not guidance-distilled."
745+
)
746+
guidance = None
747+
elif not self.transformer.config.guidance_embeds and guidance_scale is None:
728748
guidance = None
729749

730750
if self.attention_kwargs is None:

0 commit comments

Comments
 (0)