@@ -584,7 +584,8 @@ def add_noise_to_image_conditioning_latents(
584584        Add timestep-dependent noise to the hard-conditioning latents. This helps with motion continuity, especially 
585585        when conditioned on a single frame. 
586586        """ 
587-         generator  =  torch .Generator (device = "cpu" ).manual_seed (0 )
587+         # YiYi TODO: testing only, remove this change before merging 
588+         # generator = torch.Generator(device="cpu").manual_seed(0) 
588589        noise  =  randn_tensor (
589590            latents .shape ,
590591            generator = generator ,
@@ -618,7 +619,8 @@ def prepare_latents(
618619
619620        shape  =  (batch_size , num_channels_latents , num_latent_frames , latent_height , latent_width )
620621        latents  =  randn_tensor (shape , generator = generator , device = device , dtype = dtype )
621-         latents  =  torch .load ("/raid/yiyi/LTX-Video/init_latents.pt" ).to (device , dtype = dtype )
622+         # YiYi TODO: testing only, remove this change before merging 
623+         # latents = torch.load("/raid/yiyi/LTX-Video/init_latents.pt").to(device, dtype=dtype) 
622624
623625        condition_latent_frames_mask  =  torch .zeros ((batch_size , num_latent_frames ), device = device , dtype = torch .float32 )
624626
@@ -628,8 +630,9 @@ def prepare_latents(
628630        extra_conditioning_num_latents  =  0 
629631        for  data , strength , frame_index  in  zip (conditions , condition_strength , condition_frame_index ):
630632            condition_latents  =  retrieve_latents (self .vae .encode (data ), generator = generator )
631-             condition_latents  =  self ._normalize_latents (condition_latents , self .vae .latents_mean , self .vae .latents_std )
632-             condition_latents  =  torch .load ("/raid/yiyi/LTX-Video/conditioning_latents.pt" ).to (device , dtype = dtype )
633+             condition_latents  =  self ._normalize_latents (
634+                 condition_latents , self .vae .latents_mean , self .vae .latents_std 
635+             ).to (device , dtype = dtype )
633636
634637            num_data_frames  =  data .size (2 )
635638            num_cond_frames  =  condition_latents .size (2 )
@@ -659,7 +662,8 @@ def prepare_latents(
659662                        condition_latents  =  condition_latents [:, :, :num_prefix_latent_frames ]
660663
661664                noise  =  randn_tensor (condition_latents .shape , generator = generator , device = device , dtype = dtype )
662-                 noise  =  torch .load ("/raid/yiyi/LTX-Video/noise.pt" ).to (device , dtype = dtype )
665+                 # YiYi TODO: testing only, remove this change before merging 
666+                 # noise = torch.load("/raid/yiyi/LTX-Video/noise.pt").to(device, dtype=dtype) 
663667                condition_latents  =  torch .lerp (noise , condition_latents , strength )
664668
665669                condition_video_ids  =  self ._prepare_video_ids (
0 commit comments