@@ -532,7 +532,7 @@ def __call__(
532532        width : Optional [int ] =  None ,
533533        num_inference_steps : int  =  50 ,
534534        sigmas : Optional [List [float ]] =  None ,
535-         guidance_scale : float  =  1.0 ,
535+         guidance_scale : Optional [ float ]  =  None ,
536536        num_images_per_prompt : int  =  1 ,
537537        generator : Optional [Union [torch .Generator , List [torch .Generator ]]] =  None ,
538538        latents : Optional [torch .Tensor ] =  None ,
@@ -558,8 +558,13 @@ def __call__(
558558                The prompt or prompts not to guide the image generation. If not defined, one has to pass 
559559                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is 
560560                not greater than `1`). 
561-             true_cfg_scale (`float`, *optional*, defaults to 1.0): 
562-                 When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. 
561+             true_cfg_scale (`float`, *optional*, defaults to 1.0):            true_cfg_scale (`float`, *optional*, defaults to 1.0): 
562+                 Guidance scale as defined in [Classifier-Free Diffusion 
563+                 Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2. 
564+                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by 
565+                 setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to 
566+                 generate images that are closely linked to the text `prompt`, usually at the expense of lower image 
567+                 quality.  
563568            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): 
564569                The height in pixels of the generated image. This is set to 1024 by default for the best results. 
565570            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): 
@@ -571,17 +576,15 @@ def __call__(
571576                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in 
572577                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed 
573578                will be used. 
574-             guidance_scale (`float`, *optional*, defaults to 3.5): 
575-                 Guidance scale as defined in [Classifier-Free Diffusion 
576-                 Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. 
577-                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting 
578-                 `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to 
579-                 the text `prompt`, usually at the expense of lower image quality. 
580- 
581-                 This parameter in the pipeline is there to support future guidance-distilled models when they come up. 
582-                 Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance, 
583-                 please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should 
584-                 enable classifier-free guidance computations. 
579+             guidance_scale (`float`, *optional*, defaults to None): 
580+                 A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance 
581+                 where the guidance scale is applied during inference through noise prediction rescaling, guidance 
582+                 distilled models take the guidance scale directly as an input parameter during forward pass. Guidance 
583+                 scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images 
584+                 that are closely linked to the text `prompt`, usually at the expense of lower image quality. This 
585+                 parameter in the pipeline is there to support future guidance-distilled models when they come up. It is 
586+                 ignored when not using guidance distilled models. To enable traditional classifier-free guidance, please pass `true_cfg_scale > 1.0` 
587+                 and `negative_prompt` (even an empty negative prompt like " " should enable classifier-free guidance computations). 
585588            num_images_per_prompt (`int`, *optional*, defaults to 1): 
586589                The number of images to generate per prompt. 
587590            generator (`torch.Generator` or `List[torch.Generator]`, *optional*): 
@@ -672,6 +675,16 @@ def __call__(
672675        has_neg_prompt  =  negative_prompt  is  not None  or  (
673676            negative_prompt_embeds  is  not None  and  negative_prompt_embeds_mask  is  not None 
674677        )
678+ 
679+         if  true_cfg_scale  >  1  and  not  has_neg_prompt :
680+             logger .warning (
681+                 f"true_cfg_scale is passed as { true_cfg_scale }  
682+             )
683+         elif  true_cfg_scale  <=  1  and  has_neg_prompt :
684+             logger .warning (
685+                 " negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1" 
686+             )
687+ 
675688        do_true_cfg  =  true_cfg_scale  >  1  and  has_neg_prompt 
676689        prompt_embeds , prompt_embeds_mask  =  self .encode_prompt (
677690            image = prompt_image ,
@@ -734,10 +747,17 @@ def __call__(
734747        self ._num_timesteps  =  len (timesteps )
735748
736749        # handle guidance 
737-         if  self .transformer .config .guidance_embeds :
750+         if  self .transformer .config .guidance_embeds  and  guidance_scale  is  None :
751+             raise  ValueError ("guidance_scale is required for guidance-distilled model." )
752+         elif  self .transformer .config .guidance_embeds :
738753            guidance  =  torch .full ([1 ], guidance_scale , device = device , dtype = torch .float32 )
739754            guidance  =  guidance .expand (latents .shape [0 ])
740-         else :
755+         elif  not  self .transformer .config .guidance_embeds  and  guidance_scale  is  not None :
756+             logger .warning (
757+                 f"guidance_scale is passed as { guidance_scale }  
758+             )
759+             guidance  =  None 
760+         elif  not  self .transformer .config .guidance_embeds  and  guidance_scale  is  None :
741761            guidance  =  None 
742762
743763        if  self .attention_kwargs  is  None :
0 commit comments