Skip to content

Commit a2c7350

Browse files
Update modeling_visual_language.py
1 parent 6c88fbf commit a2c7350

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

optimum/intel/openvino/modeling_visual_language.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3845,10 +3845,9 @@ def get_video_features(
38453845
video_grid_thw (`torch.LongTensor` of shape `(num_videos, 3)`, *optional*):
38463846
The temporal, height and width of feature shape of each video in LLM.
38473847
"""
3848-
pixel_values_videos = pixel_values_videos.type(self.visual.dtype)
38493848
video_embeds = self.get_vision_embeddings(pixel_values_videos, video_grid_thw)
38503849
video_embeds, deepstack_video_embeds = torch.from_numpy(video_embeds[0]), torch.from_numpy(video_embeds[1])
3851-
split_sizes = (video_grid_thw.prod(-1) // self.visual.spatial_merge_size**2).tolist()
3850+
split_sizes = (video_grid_thw.prod(-1) // self.spatial_merge_size**2).tolist()
38523851
video_embeds = torch.split(video_embeds, split_sizes)
38533852
return video_embeds, deepstack_video_embeds
38543853

0 commit comments

Comments
 (0)