@@ -660,6 +660,21 @@ def forward(
660660 pipeline .add_module ("transformer" , self .transformer )
661661 fastvideo_args .model_loaded ["transformer" ] = True
662662
663+ # Prepare extra step kwargs for scheduler
664+ extra_step_kwargs = self .prepare_extra_func_kwargs (
665+ self .scheduler .step ,
666+ {
667+ "generator" : batch .generator ,
668+ "eta" : batch .eta
669+ },
670+ )
671+
672+ # Log the extra step kwargs
673+ print (f"[FASTVIDEO DEBUG] Extra step kwargs: { extra_step_kwargs } " )
674+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
675+ f .write (f"[FASTVIDEO DEBUG] Extra step kwargs: { extra_step_kwargs } \n " )
676+
677+
663678 # Setup precision to match diffusers exactly
664679 # Diffusers uses transformer.dtype (bfloat16) and converts inputs before transformer calls
665680 # For FSDP wrapped models, we need to access the underlying module
@@ -682,11 +697,43 @@ def forward(
682697 f .write (f"Denoising init: latents sum = { sum_value :.6f} , shape = { latents .shape } \n " )
683698
684699
700+ # Configure scheduler to match Diffusers exactly (MUST be before set_timesteps)
701+ sigma_max = 80.0
702+ sigma_min = 0.002
703+ sigma_data = 1.0
704+ final_sigmas_type = "sigma_min"
705+
706+ print (f"[FASTVIDEO DEBUG] BEFORE config - scheduler.config: { self .scheduler .config } " )
707+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
708+ f .write (f"[FASTVIDEO DEBUG] BEFORE config - scheduler.config: { self .scheduler .config } \n " )
709+
710+ if self .scheduler is not None :
711+ self .scheduler .register_to_config (
712+ sigma_max = sigma_max ,
713+ sigma_min = sigma_min ,
714+ sigma_data = sigma_data ,
715+ final_sigmas_type = final_sigmas_type ,
716+ )
717+ print (f"[FASTVIDEO DEBUG] Applied scheduler config: sigma_max={ sigma_max } , sigma_min={ sigma_min } , sigma_data={ sigma_data } , final_sigmas_type={ final_sigmas_type } " )
718+ print (f"[FASTVIDEO DEBUG] AFTER config - scheduler.config: { self .scheduler .config } " )
719+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
720+ f .write (f"[FASTVIDEO DEBUG] Applied scheduler config: sigma_max={ sigma_max } , sigma_min={ sigma_min } , sigma_data={ sigma_data } , final_sigmas_type={ final_sigmas_type } \n " )
721+ f .write (f"[FASTVIDEO DEBUG] AFTER config - scheduler.config: { self .scheduler .config } \n " )
722+
685723 # Setup scheduler timesteps - use default scheduler sigma generation
686724 # The torch.linspace(0, 1, num_inference_steps) approach was incorrect for FlowMatchEulerDiscreteScheduler
687725 # Let the scheduler generate its own sigmas using the configured sigma_max, sigma_min, etc.
688726 self .scheduler .set_timesteps (num_inference_steps , device = latents .device )
689727 timesteps = self .scheduler .timesteps
728+
729+ # Debug what sigmas were actually generated
730+ print (f"[FASTVIDEO DEBUG] Generated sigmas - length: { len (self .scheduler .sigmas )} , first few: { self .scheduler .sigmas [:3 ]} " )
731+ print (f"[FASTVIDEO DEBUG] Scheduler config after set_timesteps: sigma_max={ getattr (self .scheduler .config , 'sigma_max' , 'NOT_SET' )} , sigma_min={ getattr (self .scheduler .config , 'sigma_min' , 'NOT_SET' )} " )
732+ print (f"[FASTVIDEO DEBUG] Scheduler properties: self.sigma_max={ getattr (self .scheduler , 'sigma_max' , 'NOT_SET' )} , self.sigma_min={ getattr (self .scheduler , 'sigma_min' , 'NOT_SET' )} " )
733+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
734+ f .write (f"[FASTVIDEO DEBUG] Generated sigmas - length: { len (self .scheduler .sigmas )} , first few: { self .scheduler .sigmas [:3 ]} \n " )
735+ f .write (f"[FASTVIDEO DEBUG] Scheduler config after set_timesteps: sigma_max={ getattr (self .scheduler .config , 'sigma_max' , 'NOT_SET' )} , sigma_min={ getattr (self .scheduler .config , 'sigma_min' , 'NOT_SET' )} \n " )
736+ f .write (f"[FASTVIDEO DEBUG] Scheduler properties: self.sigma_max={ getattr (self .scheduler , 'sigma_max' , 'NOT_SET' )} , self.sigma_min={ getattr (self .scheduler , 'sigma_min' , 'NOT_SET' )} \n " )
690737
691738 # Handle final sigmas like diffusers
692739 if hasattr (self .scheduler .config , 'final_sigmas_type' ) and self .scheduler .config .final_sigmas_type == "sigma_min" :
@@ -844,6 +891,18 @@ def forward(
844891 print (f"[FASTVIDEO DEBUG] Step { i } : Preconditioning - c_skip={ c_skip :.6f} , c_out={ c_out :.6f} , latents_sum={ latents .float ().sum ().item ():.6f} " )
845892 with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
846893 f .write (f"[FASTVIDEO DEBUG] Step { i } : Preconditioning - c_skip={ c_skip :.6f} , c_out={ c_out :.6f} , latents_sum={ latents .float ().sum ().item ():.6f} \n " )
894+
895+ # PRECONDITIONING DTYPE VERIFICATION
896+ print (f"[FASTVIDEO DTYPE DEBUG] Step { i } : Preconditioning dtypes" )
897+ print (f"[FASTVIDEO DTYPE DEBUG] noise_pred dtype: { noise_pred .dtype } , latents dtype: { latents .dtype } " )
898+ print (f"[FASTVIDEO DTYPE DEBUG] c_skip: { c_skip :.10f} (type: { type (c_skip )} ), c_out: { c_out :.10f} (type: { type (c_out )} )" )
899+ print (f"[FASTVIDEO DTYPE DEBUG] target_dtype: { target_dtype } " )
900+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
901+ f .write (f"[FASTVIDEO DTYPE DEBUG] Step { i } : Preconditioning dtypes\n " )
902+ f .write (f"[FASTVIDEO DTYPE DEBUG] noise_pred dtype: { noise_pred .dtype } , latents dtype: { latents .dtype } \n " )
903+ f .write (f"[FASTVIDEO DTYPE DEBUG] c_skip: { c_skip :.10f} (type: { type (c_skip )} ), c_out: { c_out :.10f} (type: { type (c_out )} )\n " )
904+ f .write (f"[FASTVIDEO DTYPE DEBUG] target_dtype: { target_dtype } \n " )
905+
847906 cond_pred = (c_skip * latents + c_out * noise_pred .float ()).to (target_dtype )
848907
849908 if hasattr (batch , 'cond_indicator' ) and batch .cond_indicator is not None and conditioning_latents is not None :
@@ -954,14 +1013,34 @@ def forward(
9541013 else :
9551014 logger .warning (f"Step { i } : current_sigma too small ({ current_sigma } ), using final_pred directly" )
9561015 noise_for_scheduler = final_pred
957-
1016+
9581017 # Debug: Check for NaN values before scheduler step
9591018 if torch .isnan (noise_for_scheduler ).sum () > 0 :
9601019 logger .error (f"Step { i } : NaN detected in noise_for_scheduler, sum: { noise_for_scheduler .float ().sum ().item ()} " )
9611020 logger .error (f"Step { i } : latents sum: { latents .float ().sum ().item ()} , final_pred sum: { final_pred .float ().sum ().item ()} , current_sigma: { current_sigma } " )
962-
1021+
1022+ # DTYPE VERIFICATION LOGS
1023+ print (f"[FASTVIDEO DTYPE DEBUG] Step { i } : Before scheduler step" )
1024+ print (f"[FASTVIDEO DTYPE DEBUG] latents dtype: { latents .dtype } , sum: { latents .float ().sum ().item ():.6f} " )
1025+ print (f"[FASTVIDEO DTYPE DEBUG] final_pred dtype: { final_pred .dtype } , sum: { final_pred .float ().sum ().item ():.6f} " )
1026+ print (f"[FASTVIDEO DTYPE DEBUG] noise_for_scheduler dtype: { noise_for_scheduler .dtype } , sum: { noise_for_scheduler .float ().sum ().item ():.6f} " )
1027+ print (f"[FASTVIDEO DTYPE DEBUG] current_sigma: { current_sigma :.10f} (type: { type (current_sigma )} )" )
1028+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
1029+ f .write (f"[FASTVIDEO DTYPE DEBUG] Step { i } : Before scheduler step\n " )
1030+ f .write (f"[FASTVIDEO DTYPE DEBUG] latents dtype: { latents .dtype } , sum: { latents .float ().sum ().item ():.6f} \n " )
1031+ f .write (f"[FASTVIDEO DTYPE DEBUG] final_pred dtype: { final_pred .dtype } , sum: { final_pred .float ().sum ().item ():.6f} \n " )
1032+ f .write (f"[FASTVIDEO DTYPE DEBUG] noise_for_scheduler dtype: { noise_for_scheduler .dtype } , sum: { noise_for_scheduler .float ().sum ().item ():.6f} \n " )
1033+ f .write (f"[FASTVIDEO DTYPE DEBUG] current_sigma: { current_sigma :.10f} (type: { type (current_sigma )} )\n " )
1034+
9631035 # Standard scheduler step like diffusers
964- latents = self .scheduler .step (noise_for_scheduler , t , latents , return_dict = False )[0 ]
1036+ latents = self .scheduler .step (noise_for_scheduler , t , latents , ** extra_step_kwargs , return_dict = False )[0 ]
1037+
1038+ # DTYPE VERIFICATION LOGS AFTER SCHEDULER
1039+ print (f"[FASTVIDEO DTYPE DEBUG] Step { i } : After scheduler step" )
1040+ print (f"[FASTVIDEO DTYPE DEBUG] latents dtype: { latents .dtype } , sum: { latents .float ().sum ().item ():.6f} " )
1041+ with open ("/workspace/FastVideo/fastvideo_hidden_states.log" , "a" ) as f :
1042+ f .write (f"[FASTVIDEO DTYPE DEBUG] Step { i } : After scheduler step\n " )
1043+ f .write (f"[FASTVIDEO DTYPE DEBUG] latents dtype: { latents .dtype } , sum: { latents .float ().sum ().item ():.6f} \n " )
9651044 sum_value = latents .float ().sum ().item ()
9661045 logger .info (f"CosmosDenoisingStage: step { i } , updated latents sum = { sum_value :.6f} " )
9671046 # Write to output file
0 commit comments