@@ -120,7 +120,6 @@ def _prepare_video_coords(
120120        height : int ,
121121        width : int ,
122122        rope_interpolation_scale : Tuple [torch .Tensor , float , float ],
123-         frame_rate : float ,
124123        device : torch .device ,
125124    ) ->  torch .Tensor :
126125        # Always compute rope in fp32 
@@ -146,7 +145,6 @@ def forward(
146145        num_frames : Optional [int ] =  None ,
147146        height : Optional [int ] =  None ,
148147        width : Optional [int ] =  None ,
149-         frame_rate : Optional [int ] =  None ,
150148        rope_interpolation_scale : Optional [Tuple [torch .Tensor , float , float ]] =  None ,
151149        video_coords : Optional [torch .Tensor ] =  None ,
152150    ) ->  Tuple [torch .Tensor , torch .Tensor ]:
@@ -159,7 +157,6 @@ def forward(
159157                height ,
160158                width ,
161159                rope_interpolation_scale = rope_interpolation_scale ,
162-                 frame_rate = frame_rate ,
163160                device = hidden_states .device ,
164161            )
165162        else :
@@ -404,7 +401,6 @@ def forward(
404401        num_frames : Optional [int ] =  None ,
405402        height : Optional [int ] =  None ,
406403        width : Optional [int ] =  None ,
407-         frame_rate : Optional [int ] =  None ,
408404        rope_interpolation_scale : Optional [Union [Tuple [float , float , float ], torch .Tensor ]] =  None ,
409405        video_coords : Optional [torch .Tensor ] =  None ,
410406        attention_kwargs : Optional [Dict [str , Any ]] =  None ,
@@ -425,9 +421,7 @@ def forward(
425421                    "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective." 
426422                )
427423
428-         image_rotary_emb  =  self .rope (
429-             hidden_states , num_frames , height , width , frame_rate , rope_interpolation_scale , video_coords 
430-         )
424+         image_rotary_emb  =  self .rope (hidden_states , num_frames , height , width , rope_interpolation_scale , video_coords )
431425
432426        # convert encoder_attention_mask to a bias the same way we do for attention_mask 
433427        if  encoder_attention_mask  is  not None  and  encoder_attention_mask .ndim  ==  2 :
0 commit comments