Skip to content

Commit 0e8073f

Browse files
committed
add test code on EK100
1 parent 92a1168 commit 0e8073f

File tree

7 files changed

+54
-21
lines changed

7 files changed

+54
-21
lines changed

.vscode/launch.json

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// "--master_port=29500",
2626
// "llava/train/train_mem.py",
2727
// "--deepspeed", "scripts/zero3.json",
28-
// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov",
28+
// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-7b-ov",
2929
// "--version", "qwen_1_5",
3030
// "--data_path", "scripts/train/onevision.yaml",
3131
// // "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data",
@@ -43,8 +43,8 @@
4343
// "--image_grid_pinpoints", "(1x1),...,(6x6)",
4444
// "--mm_patch_merge_type", "spatial_unpad",
4545
// "--bf16", "True",
46-
// "--run_name", "test",
47-
// "--output_dir", "experiments/test",
46+
// "--run_name", "test1",
47+
// "--output_dir", "experiments/test1",
4848
// "--num_train_epochs", "1",
4949
// "--per_device_train_batch_size", "1",
5050
// "--per_device_eval_batch_size", "4",
@@ -77,6 +77,29 @@
7777
// }
7878

7979

80+
// {
81+
// // Use IntelliSense to learn about possible attributes.
82+
// // Hover to view descriptions of existing attributes.
83+
// // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
84+
// "version": "0.2.0",
85+
// "configurations": [
86+
// {
87+
// "name": "Python: Current File",
88+
// "type": "debugpy",
89+
// "request": "launch",
90+
// "program": "docs/LLaVA_OneVision_Tutorials.py",
91+
// "console": "integratedTerminal",
92+
// "env":{"CUDA_VISIBLE_DEVICES":"0",
93+
// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"},
94+
// "justMyCode": false,
95+
// // "args": [
96+
// // "--run_dir_name", "test",
97+
// // // "--use_big_decoder"
98+
// // ]
99+
// }
100+
// ]
101+
// }
102+
80103
{
81104
// Use IntelliSense to learn about possible attributes.
82105
// Hover to view descriptions of existing attributes.
@@ -87,15 +110,16 @@
87110
"name": "Python: Current File",
88111
"type": "debugpy",
89112
"request": "launch",
90-
"program": "docs/LLaVA_OneVision_Tutorials.py",
113+
"program": "action/dataset.py",
91114
"console": "integratedTerminal",
92-
"env":{"CUDA_VISIBLE_DEVICES":"0",
93-
"LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"},
115+
"env":{"CUDA_VISIBLE_DEVICES":"0"},
94116
"justMyCode": false,
95-
// "args": [
96-
// "--run_dir_name", "test",
97-
// // "--use_big_decoder"
98-
// ]
117+
"args": [
118+
"--root", "/mnt/SV_storage/VFM/EK100/EK100_320p_15sec_30fps_libx264",
119+
"--train-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv",
120+
"--val-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv",
121+
// "--use_big_decoder"
122+
]
99123
}
100124
]
101125
}

action/dataset.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
import decord
1111
from torch.utils.data import DataLoader
1212
from tqdm import tqdm
13+
from pathlib import Path
14+
import sys
15+
import os
16+
sys.path[0] = os.path.dirname(sys.path[0])
1317

1418

1519
def datetime2sec(str):
@@ -130,8 +134,9 @@ def __init__(self, dataset, root, metadata, is_trimmed=True):
130134
self.root = root
131135
self.metadata = metadata
132136
self.is_trimmed = is_trimmed
133-
self.verb_file = f'/data/EK100/epic-kitchens-100-annotations/EPIC_100_verb_classes.csv'
134-
self.noun_file = f'/data/EK100/epic-kitchens-100-annotations/EPIC_100_noun_classes.csv'
137+
anno_root = Path(metadata).parent
138+
self.verb_file = str(anno_root / 'EPIC_100_verb_classes.csv')
139+
self.noun_file = str(anno_root / 'EPIC_100_noun_classes.csv')
135140
self.verb_df = pd.read_csv(self.verb_file)
136141
self.nouns_df = pd.read_csv(self.noun_file)
137142
self.nouns = self.nouns_df['key'].to_list()
@@ -484,8 +489,8 @@ def generate_label_map():
484489
vn_list = []
485490
mapping_vn2narration = {}
486491
for f in [
487-
'/data/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv',
488-
'/data/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv',
492+
'/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv',
493+
'/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv',
489494
]:
490495
csv_reader = csv.reader(open(f))
491496
_ = next(csv_reader) # skip the header
@@ -617,7 +622,7 @@ def get_args_parser():
617622
)
618623

619624
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)
620-
from llava_ov_inference import llava_inference
625+
from action.llava_ov_inference import llava_inference
621626
gts = []
622627
preds = []
623628
running_corrects = 0
@@ -644,5 +649,5 @@ def get_args_parser():
644649
# get final accuracy
645650
accuracy = np.mean(gts == preds)
646651
print('Final accuracy', accuracy)
647-
with open('llava_ov_4f_0.5b_result.txt', 'w') as f:
652+
with open('llava_ov_16f_7b_result.txt', 'w') as f:
648653
f.write(f'Final accuracy: {accuracy:.4f}\n')

action/llava_ov_inference.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
warnings.filterwarnings("ignore")
1717
# Load the OneVision model
18-
pretrained = "lmms-lab/llava-onevision-qwen2-0.5b-ov"
18+
pretrained = "lmms-lab/llava-onevision-qwen2-7b-ov"
1919
model_name = "llava_qwen"
2020
device = "cuda"
2121
device_map = "auto"
@@ -25,7 +25,6 @@
2525

2626
def llava_inference(video_frames, gt):
2727
video_frames = video_frames[0]
28-
video_frames = video_frames[::4]
2928
image_tensors = []
3029
frames = image_processor.preprocess(video_frames, return_tensors="pt")["pixel_values"].half().cuda()
3130
image_tensors.append(frames)

run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ torchrun --nproc_per_node=2 \
3636
--image_grid_pinpoints "(1x1),...,(6x6)" \
3737
--mm_patch_merge_type spatial_unpad \
3838
--bf16 True \
39-
--run_name test \
40-
--output_dir experiments/test \
39+
--run_name test1 \
40+
--output_dir experiments/test1 \
4141
--num_train_epochs 1 \
4242
--per_device_train_batch_size 1 \
4343
--per_device_eval_batch_size 4 \

run_EK100.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
python3 action/dataset.py \
2+
--root /media/data/haozhe/VFM/EK100/EK100_320p_15sec_30fps_libx264 \
3+
--train-metadata /media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv \
4+
--val-metadata /media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv > kitchen_test.out 2>&1

run_demo.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ export CUDA_VISIBLE_DEVICES="0"
55
# export LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libffi.so.7"
66

77
# Run the Python script
8-
python docs/LLaVA_OneVision_Tutorials.py > demo7b.out 2>&1
8+
python3 docs/LLaVA_OneVision_Tutorials.py > demo7b.out 2>&1

scripts/train/onevision.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ datasets:
6868
# - json_path: /mnt/bn/vl-research/data/llava_instruct/real_vision_flan/mathqa_29837.json
6969
# sampling_strategy: "all"
7070
- json_path: /media/data/haozhe/VFM/onevision/llava_instruct/geo3k.json
71+
# - json_path: /mediaPFM/data/haozhe/onevision/llava_instruct/geo3k.json
7172
sampling_strategy: "all"
7273
# - json_path: /mnt/bn/vl-research/data/llava_instruct/real_vision_flan/geo170k_qa_converted_67833.json
7374
# sampling_strategy: "first:10%"

0 commit comments

Comments
 (0)