Skip to content

Commit 342c1f9

Browse files
committed
up
1 parent 9918d13 commit 342c1f9

File tree

1 file changed

+33
-10
lines changed

1 file changed

+33
-10
lines changed

src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def __call__(
435435
width: Optional[int] = None,
436436
num_inference_steps: int = 50,
437437
sigmas: Optional[List[float]] = None,
438-
guidance_scale: float = 1.0,
438+
guidance_scale: Optional[float] = None,
439439
num_images_per_prompt: int = 1,
440440
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
441441
latents: Optional[torch.Tensor] = None,
@@ -462,7 +462,12 @@ def __call__(
462462
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
463463
not greater than `1`).
464464
true_cfg_scale (`float`, *optional*, defaults to 1.0):
465-
When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
465+
Guidance scale as defined in [Classifier-Free Diffusion
466+
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
467+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
468+
setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to
469+
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
470+
quality.
466471
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
467472
The height in pixels of the generated image. This is set to 1024 by default for the best results.
468473
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -474,12 +479,13 @@ def __call__(
474479
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
475480
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
476481
will be used.
477-
guidance_scale (`float`, *optional*, defaults to 3.5):
478-
Guidance scale as defined in [Classifier-Free Diffusion
479-
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
480-
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
481-
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
482-
the text `prompt`, usually at the expense of lower image quality.
482+
guidance_scale (`float`, *optional*, defaults to None):
483+
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
484+
where the guidance scale is applied during inference through noise prediction rescaling, guidance
485+
distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
486+
scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
487+
that are closely linked to the text `prompt`, usually at the expense of lower image quality. Ignored
488+
when not using guidance distilled models.
483489
num_images_per_prompt (`int`, *optional*, defaults to 1):
484490
The number of images to generate per prompt.
485491
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -559,6 +565,16 @@ def __call__(
559565
has_neg_prompt = negative_prompt is not None or (
560566
negative_prompt_embeds is not None and negative_prompt_embeds_mask is not None
561567
)
568+
569+
if true_cfg_scale > 1 and not has_neg_prompt:
570+
logger.warning(
571+
f"true_cfg_scale is passed as {true_cfg_scale}, but classifier-free guidance is not enabled since no negative_prompt is provided."
572+
)
573+
elif true_cfg_scale <= 1 and has_neg_prompt:
574+
logger.warning(
575+
" negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1"
576+
)
577+
562578
do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
563579
prompt_embeds, prompt_embeds_mask = self.encode_prompt(
564580
prompt=prompt,
@@ -613,10 +629,17 @@ def __call__(
613629
self._num_timesteps = len(timesteps)
614630

615631
# handle guidance
616-
if self.transformer.config.guidance_embeds:
632+
if self.transformer.config.guidance_embeds and guidance_scale is None:
633+
raise ValueError("guidance_scale is required for guidance-distilled model.")
634+
elif self.transformer.config.guidance_embeds:
617635
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
618636
guidance = guidance.expand(latents.shape[0])
619-
else:
637+
elif not self.transformer.config.guidance_embeds and guidance_scale is not None:
638+
logger.warning(
639+
f"guidance_scale is passed as {guidance_scale}, but ignored since the model is not guidance-distilled."
640+
)
641+
guidance = None
642+
elif not self.transformer.config.guidance_embeds and guidance_scale is None:
620643
guidance = None
621644

622645
if self.attention_kwargs is None:

0 commit comments

Comments
 (0)