From d146f12437cb7135ec65ba6ce0fcc5ea657f1880 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 15 Jul 2025 20:57:05 +0100 Subject: [PATCH 01/11] update guidance_scale docstring for guidance_distilled models. --- src/diffusers/pipelines/flux/pipeline_flux.py | 6 +++++- .../pipelines/flux/pipeline_flux_control.py | 4 +++- .../pipelines/flux/pipeline_flux_kontext.py | 4 +++- .../pipelines/flux/pipeline_flux_kontext_inpaint.py | 4 +++- src/diffusers/pipelines/sana/pipeline_sana_sprint.py | 12 +++++++----- 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index 073d94750a02..b4553d7aa86c 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -691,7 +691,11 @@ def __call__( Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- + distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying + `guidance_scale > 1` just mimics it. In case of Flux, which is a guidance- distilled model, + `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying `guidance_scale > 1` + just mimics it. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_control.py b/src/diffusers/pipelines/flux/pipeline_flux_control.py index ea49821adc65..b28564b0d99f 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_control.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_control.py @@ -665,7 +665,9 @@ def __call__( Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- + distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying + `guidance_scale > 1` just mimics it. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py index 94901ee0b635..23252e6def60 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py @@ -799,7 +799,9 @@ def __call__( Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- + distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying + `guidance_scale > 1` just mimics it. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py index 2b4abe8b2437..1e5fbf34165d 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py @@ -1019,7 +1019,9 @@ def __call__( Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- + distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying + `guidance_scale > 1` just mimics it. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py index fcf854a54cad..a53066c1df66 100644 --- a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py +++ b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py @@ -643,11 +643,13 @@ def __call__( in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. Must be in descending order. guidance_scale (`float`, *optional*, defaults to 4.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + Guidance scale as defined in [Classifier-Free Diffusion Guidance scale as defined in [Classifier-Free + Diffusion Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of + equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by + setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely + linked to the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a + guidance- distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. + Specifying `guidance_scale > 1` just mimics it. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. height (`int`, *optional*, defaults to self.unet.config.sample_size): From f6e92caf6b4e0bb484da6526f980603ddd910a50 Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:59:23 -0700 Subject: [PATCH 02/11] Update pipeline_flux.py --- src/diffusers/pipelines/flux/pipeline_flux.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index c0d9473a5bb2..8bc3012d1eb4 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -674,7 +674,7 @@ def __call__( The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. true_cfg_scale (`float`, *optional*, defaults to 1.0): - When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. + True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and `negative_prompt` is provided. height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): @@ -687,15 +687,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- - distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying - `guidance_scale > 1` just mimics it. In case of Flux, which is a guidance- distilled model, - `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying `guidance_scale > 1` - just mimics it. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to + generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to the + [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): From f515e171cb8148cc850d16a2c07724a4896f0d85 Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:01:02 -0700 Subject: [PATCH 03/11] Update pipeline_flux_control.py --- .../pipelines/flux/pipeline_flux_control.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux_control.py b/src/diffusers/pipelines/flux/pipeline_flux_control.py index b28564b0d99f..29b70db53d8f 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_control.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_control.py @@ -661,13 +661,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- - distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying - `guidance_scale > 1` just mimics it. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to generate images + more aligned with prompt at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the + [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): From 7c017d2e8ce3381e44eb8ae042512e8688db0fdb Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:02:16 -0700 Subject: [PATCH 04/11] Update pipeline_flux_kontext.py --- .../pipelines/flux/pipeline_flux_kontext.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py index 23252e6def60..8f095194c805 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py @@ -795,13 +795,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- - distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying - `guidance_scale > 1` just mimics it. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to + generate images more aligned with prompt at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the + [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): From 70ba6ab21e066a197e5f26c72c118338c1f1c14f Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:03:52 -0700 Subject: [PATCH 05/11] Update pipeline_flux_kontext_inpaint.py --- .../flux/pipeline_flux_kontext_inpaint.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py index 1e5fbf34165d..3e7e869e8be9 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py @@ -989,7 +989,7 @@ def __call__( The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. true_cfg_scale (`float`, *optional*, defaults to 1.0): - When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. + True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and `negative_prompt` is provided. height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): @@ -1015,13 +1015,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a guidance- - distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. Specifying - `guidance_scale > 1` just mimics it. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to + generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the + [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): From 774cb7101b8e5f76f6e5e91800a950e818fe82f3 Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:05:10 -0700 Subject: [PATCH 06/11] Update pipeline_sana_sprint.py --- src/diffusers/pipelines/sana/pipeline_sana_sprint.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py index a53066c1df66..a5dcf4b5c5e8 100644 --- a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py +++ b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py @@ -643,13 +643,11 @@ def __call__( in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. Must be in descending order. guidance_scale (`float`, *optional*, defaults to 4.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance scale as defined in [Classifier-Free - Diffusion Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of - equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by - setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely - linked to the text `prompt`, usually at the expense of lower image quality. In case of Flux, which is a - guidance- distilled model, `guidance_scale > 1` doesn't implement true classifier-free guidance. - Specifying `guidance_scale > 1` just mimics it. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to + generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to the + [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. height (`int`, *optional*, defaults to self.unet.config.sample_size): From b4d45f04010b7f51380781beea9e65e5b9e62d95 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 22 Jul 2025 07:24:18 +0100 Subject: [PATCH 07/11] style --- src/diffusers/pipelines/flux/pipeline_flux.py | 11 ++++++----- .../pipelines/flux/pipeline_flux_control.py | 10 +++++----- .../pipelines/flux/pipeline_flux_kontext.py | 10 +++++----- .../pipelines/flux/pipeline_flux_kontext_inpaint.py | 13 +++++++------ .../pipelines/sana/pipeline_sana_sprint.py | 8 ++++---- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index 8bc3012d1eb4..7211fb5693fe 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -674,7 +674,8 @@ def __call__( The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. true_cfg_scale (`float`, *optional*, defaults to 1.0): - True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and `negative_prompt` is provided. + True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and + `negative_prompt` is provided. height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): @@ -687,11 +688,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to - generate images more aligned with `prompt` at the expense of lower image quality. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. - Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to the - [paper](https://huggingface.co/papers/2210.03142) to learn more. + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_control.py b/src/diffusers/pipelines/flux/pipeline_flux_control.py index 29b70db53d8f..5a057f94cfaa 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_control.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_control.py @@ -661,11 +661,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to generate images - more aligned with prompt at the expense of lower image quality. - - Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the - [paper](https://huggingface.co/papers/2210.03142) to learn more. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with prompt at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py index 8f095194c805..3c78aeaf36e8 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext.py @@ -795,11 +795,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to - generate images more aligned with prompt at the expense of lower image quality. - - Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the - [paper](https://huggingface.co/papers/2210.03142) to learn more. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with prompt at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py index 3e7e869e8be9..6dc621901c8c 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py @@ -989,7 +989,8 @@ def __call__( The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. true_cfg_scale (`float`, *optional*, defaults to 1.0): - True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and `negative_prompt` is provided. + True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and + `negative_prompt` is provided. height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): @@ -1015,11 +1016,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to - generate images more aligned with `prompt` at the expense of lower image quality. - - Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to the - [paper](https://huggingface.co/papers/2210.03142) to learn more. + Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): diff --git a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py index a5dcf4b5c5e8..e8f9d8368f2a 100644 --- a/src/diffusers/pipelines/sana/pipeline_sana_sprint.py +++ b/src/diffusers/pipelines/sana/pipeline_sana_sprint.py @@ -643,11 +643,11 @@ def __call__( in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. Must be in descending order. guidance_scale (`float`, *optional*, defaults to 4.5): - Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages a model to - generate images more aligned with `prompt` at the expense of lower image quality. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. - Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to the - [paper](https://huggingface.co/papers/2210.03142) to learn more. + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. height (`int`, *optional*, defaults to self.unet.config.sample_size): From cb76afda1bd086f328b1a5d710003ab93b43a635 Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 22 Jul 2025 08:53:43 -0700 Subject: [PATCH 08/11] Update pipeline_hidream_image.py --- .../pipelines/hidream_image/pipeline_hidream_image.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py index 341cdaf1e6ef..695f54f3d9db 100644 --- a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py +++ b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py @@ -763,11 +763,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. negative_prompt (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is From 962196a2808780cc94abff035fa114ee3b3c427c Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 22 Jul 2025 08:54:23 -0700 Subject: [PATCH 09/11] Update pipeline_chroma.py --- src/diffusers/pipelines/chroma/pipeline_chroma.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/diffusers/pipelines/chroma/pipeline_chroma.py b/src/diffusers/pipelines/chroma/pipeline_chroma.py index c74834ee8252..3a34ec2a4218 100644 --- a/src/diffusers/pipelines/chroma/pipeline_chroma.py +++ b/src/diffusers/pipelines/chroma/pipeline_chroma.py @@ -663,11 +663,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 3.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_images_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*): From 5ee14080aef3c46c0c64b7759f4a7ffffbe6ad9d Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 22 Jul 2025 08:54:52 -0700 Subject: [PATCH 10/11] Update pipeline_chroma_img2img.py --- .../pipelines/chroma/pipeline_chroma_img2img.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/diffusers/pipelines/chroma/pipeline_chroma_img2img.py b/src/diffusers/pipelines/chroma/pipeline_chroma_img2img.py index 9936608aaf21..e169db4a4d3e 100644 --- a/src/diffusers/pipelines/chroma/pipeline_chroma_img2img.py +++ b/src/diffusers/pipelines/chroma/pipeline_chroma_img2img.py @@ -725,11 +725,11 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. guidance_scale (`float`, *optional*, defaults to 5.0): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. strength (`float, *optional*, defaults to 0.9): Conceptually, indicates how much to transform the reference image. Must be between 0 and 1. image will be used as a starting point, adding more noise to it the larger the strength. The number of denoising From 73e6b3e308976fc6a49e19e053617e3677a3f09c Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 22 Jul 2025 08:55:23 -0700 Subject: [PATCH 11/11] Update pipeline_hunyuan_video.py --- .../hunyuan_video/pipeline_hunyuan_video.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py b/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py index 2cbb4af2b4cc..76b288ed0bd8 100644 --- a/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +++ b/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py @@ -529,15 +529,14 @@ def __call__( their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed will be used. true_cfg_scale (`float`, *optional*, defaults to 1.0): - When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. + True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and + `negative_prompt` is provided. guidance_scale (`float`, defaults to `6.0`): - Guidance scale as defined in [Classifier-Free Diffusion - Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. - of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting - `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to - the text `prompt`, usually at the expense of lower image quality. Note that the only available - HunyuanVideo model is CFG-distilled, which means that traditional guidance between unconditional and - conditional latent is not applied. + Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages + a model to generate images more aligned with `prompt` at the expense of lower image quality. + + Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to + the [paper](https://huggingface.co/papers/2210.03142) to learn more. num_videos_per_prompt (`int`, *optional*, defaults to 1): The number of images to generate per prompt. generator (`torch.Generator` or `List[torch.Generator]`, *optional*):