@@ -435,7 +435,7 @@ def __call__(
435435        width : Optional [int ] =  None ,
436436        num_inference_steps : int  =  50 ,
437437        sigmas : Optional [List [float ]] =  None ,
438-         guidance_scale : float  =  1.0 ,
438+         guidance_scale : Optional [ float ]  =  None ,
439439        num_images_per_prompt : int  =  1 ,
440440        generator : Optional [Union [torch .Generator , List [torch .Generator ]]] =  None ,
441441        latents : Optional [torch .Tensor ] =  None ,
@@ -462,7 +462,12 @@ def __call__(
462462                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is 
463463                not greater than `1`). 
464464            true_cfg_scale (`float`, *optional*, defaults to 1.0): 
465-                 When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. 
465+                 Guidance scale as defined in [Classifier-Free Diffusion 
466+                 Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2. 
467+                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by 
468+                 setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale encourages to 
469+                 generate images that are closely linked to the text `prompt`, usually at the expense of lower image 
470+                 quality. 
466471            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): 
467472                The height in pixels of the generated image. This is set to 1024 by default for the best results. 
468473            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): 
@@ -474,12 +479,13 @@ def __call__(
474479                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in 
475480                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed 
476481                will be used. 
477-             guidance_scale (`float`, *optional*, defaults to 3.5): 
478-                 Guidance scale as defined in [Classifier-Free Diffusion 
479-                 Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2. 
480-                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting 
481-                 `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to 
482-                 the text `prompt`, usually at the expense of lower image quality. 
482+             guidance_scale (`float`, *optional*, defaults to None): 
483+                 A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance 
484+                 where the guidance scale is applied during inference through noise prediction rescaling, guidance 
485+                 distilled models take the guidance scale directly as an input parameter during forward pass. Guidance 
486+                 scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images 
487+                 that are closely linked to the text `prompt`, usually at the expense of lower image quality. Ignored 
488+                 when not using guidance distilled models. 
483489            num_images_per_prompt (`int`, *optional*, defaults to 1): 
484490                The number of images to generate per prompt. 
485491            generator (`torch.Generator` or `List[torch.Generator]`, *optional*): 
@@ -559,6 +565,16 @@ def __call__(
559565        has_neg_prompt  =  negative_prompt  is  not None  or  (
560566            negative_prompt_embeds  is  not None  and  negative_prompt_embeds_mask  is  not None 
561567        )
568+ 
569+         if  true_cfg_scale  >  1  and  not  has_neg_prompt :
570+             logger .warning (
571+                 f"true_cfg_scale is passed as { true_cfg_scale }  
572+             )
573+         elif  true_cfg_scale  <=  1  and  has_neg_prompt :
574+             logger .warning (
575+                 " negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1" 
576+             )
577+ 
562578        do_true_cfg  =  true_cfg_scale  >  1  and  has_neg_prompt 
563579        prompt_embeds , prompt_embeds_mask  =  self .encode_prompt (
564580            prompt = prompt ,
@@ -613,10 +629,17 @@ def __call__(
613629        self ._num_timesteps  =  len (timesteps )
614630
615631        # handle guidance 
616-         if  self .transformer .config .guidance_embeds :
632+         if  self .transformer .config .guidance_embeds  and  guidance_scale  is  None :
633+             raise  ValueError ("guidance_scale is required for guidance-distilled model." )
634+         elif  self .transformer .config .guidance_embeds :
617635            guidance  =  torch .full ([1 ], guidance_scale , device = device , dtype = torch .float32 )
618636            guidance  =  guidance .expand (latents .shape [0 ])
619-         else :
637+         elif  not  self .transformer .config .guidance_embeds  and  guidance_scale  is  not None :
638+             logger .warning (
639+                 f"guidance_scale is passed as { guidance_scale }  
640+             )
641+             guidance  =  None 
642+         elif  not  self .transformer .config .guidance_embeds  and  guidance_scale  is  None :
620643            guidance  =  None 
621644
622645        if  self .attention_kwargs  is  None :
0 commit comments