Skip to content

Commit 05ca02e

Browse files
update video code
1 parent 9f9f4a3 commit 05ca02e

File tree

4 files changed

+11
-6
lines changed

4 files changed

+11
-6
lines changed

llava/model/llava_arch.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ def prepare_inputs_labels_for_multimodal(self, input_ids, position_ids, attentio
221221
if vision_tower is None or images is None or input_ids.shape[1] == 1:
222222
return input_ids, position_ids, attention_mask, past_key_values, None, labels
223223

224+
if isinstance(modalities, str):
225+
modalities = [modalities]
226+
224227
if type(images) is list or images.ndim == 5:
225228
if type(images) is list:
226229
images = [x.unsqueeze(0) if x.ndim == 3 else x for x in images]
@@ -230,6 +233,8 @@ def prepare_inputs_labels_for_multimodal(self, input_ids, position_ids, attentio
230233
if modalities[_] == "video":
231234
video_idx_in_batch.append(_)
232235

236+
# print(video_idx_in_batch)
237+
233238
images_list = []
234239
for image in images:
235240
if image.ndim == 4:

playground/demo/video_demo.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import argparse
22
import torch
33

4-
from llavavid.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
5-
from llavavid.conversation import conv_templates, SeparatorStyle
6-
from llavavid.model.builder import load_pretrained_model
7-
from llavavid.utils import disable_torch_init
8-
from llavavid.mm_utils import process_anyres_image,tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
4+
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
5+
from llava.conversation import conv_templates, SeparatorStyle
6+
from llava.model.builder import load_pretrained_model
7+
from llava.utils import disable_torch_init
8+
from llava.mm_utils import process_anyres_image,tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
99

1010
import json
1111
import os
265 KB
Binary file not shown.

scripts/video/eval/activitynet_eval.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ echo $SAVE_DIR
8787

8888
# wait
8989

90-
python3 llavavid/eval/eval_activitynet_qa.py \
90+
python3 llava/eval/eval_activitynet_qa.py \
9191
--pred_path ./work_dirs/eval_activitynet/$SAVE_DIR \
9292
--output_dir ./work_dirs/eval_activitynet/$SAVE_DIR/results \
9393
--output_json ./work_dirs/eval_activitynet/$SAVE_DIR/results.json \

0 commit comments

Comments
 (0)