Skip to content

Commit 03cfaf7

Browse files
committed
Merge branch 'main' of github.com:HaozheQi/LLaVA-NeXT into main
2 parents f94b866 + fd63d2a commit 03cfaf7

File tree

3 files changed

+107
-101
lines changed

3 files changed

+107
-101
lines changed

.vscode/launch.json

Lines changed: 99 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,105 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Run LLAVA Training with torchrun",
6+
"type": "debugpy",
7+
"request": "launch",
8+
"module": "torch.distributed.run",
9+
"env": {
10+
"CUDA_VISIBLE_DEVICES": "1,2",
11+
"OMP_NUM_THREADS": "8",
12+
"NCCL_IB_DISABLE": "0",
13+
"NCCL_IB_GID_INDEX": "3",
14+
"NCCL_SOCKET_IFNAME": "eth0",
15+
"NCCL_DEBUG": "INFO",
16+
"ACCELERATE_CPU_AFFINITY": "1",
17+
"LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7",
18+
"WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7",
19+
},
20+
"args": [
21+
"--nproc_per_node=2",
22+
"--nnodes=1",
23+
"--node_rank=0",
24+
"--master_addr=127.0.0.1",
25+
"--master_port=29500",
26+
"llava/train/train_mem.py",
27+
"--deepspeed", "scripts/zero3.json",
28+
"--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov",
29+
"--version", "qwen_1_5",
30+
"--data_path", "scripts/train/onevision.yaml",
31+
// "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data",
32+
"--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data/geo3k/",
33+
"--video_folder", "/mediaPFM/data/haozhe/onevision/llava_video",
34+
"--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
35+
"--mm_vision_tower_lr", "2e-6",
36+
"--vision_tower", "google/siglip-so400m-patch14-384",
37+
"--mm_projector_type", "mlp2x_gelu",
38+
"--mm_vision_select_layer", "-2",
39+
"--mm_use_im_start_end", "False",
40+
"--mm_use_im_patch_token", "False",
41+
"--group_by_modality_length", "True",
42+
"--image_aspect_ratio", "anyres_max_9",
43+
"--image_grid_pinpoints", "(1x1),...,(6x6)",
44+
"--mm_patch_merge_type", "spatial_unpad",
45+
"--bf16", "True",
46+
"--run_name", "test1",
47+
"--output_dir", "experiments/test1",
48+
"--num_train_epochs", "1",
49+
"--per_device_train_batch_size", "1",
50+
"--per_device_eval_batch_size", "4",
51+
"--gradient_accumulation_steps", "2",
52+
"--evaluation_strategy", "no",
53+
"--save_strategy", "steps",
54+
"--save_steps", "1000",
55+
"--save_total_limit", "1",
56+
"--learning_rate", "1e-5",
57+
"--weight_decay", "0.",
58+
"--warmup_ratio", "0.03",
59+
"--lr_scheduler_type", "cosine",
60+
"--logging_steps", "1",
61+
"--tf32", "True",
62+
"--model_max_length", "32768",
63+
"--gradient_checkpointing", "True",
64+
"--dataloader_num_workers", "4",
65+
"--lazy_preprocess", "True",
66+
"--report_to", "wandb",
67+
"--torch_compile", "True",
68+
"--torch_compile_backend", "inductor",
69+
"--dataloader_drop_last", "True",
70+
"--frames_upbound", "32",
71+
],
72+
"console": "integratedTerminal",
73+
"justMyCode": false,
74+
"cwd": "${workspaceFolder}"
75+
}
76+
]
77+
}
78+
79+
180
// {
81+
// // Use IntelliSense to learn about possible attributes.
82+
// // Hover to view descriptions of existing attributes.
83+
// // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
284
// "version": "0.2.0",
385
// "configurations": [
486
// {
5-
// "name": "Run LLAVA Training with torchrun",
87+
// "name": "Python: Current File",
688
// "type": "debugpy",
789
// "request": "launch",
8-
// "module": "torch.distributed.run",
9-
// "env": {
10-
// "CUDA_VISIBLE_DEVICES": "1,2,3",
11-
// "OMP_NUM_THREADS": "8",
12-
// "NCCL_IB_DISABLE": "0",
13-
// "NCCL_IB_GID_INDEX": "3",
14-
// "NCCL_SOCKET_IFNAME": "eth0",
15-
// "NCCL_DEBUG": "INFO",
16-
// "ACCELERATE_CPU_AFFINITY": "1",
17-
// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7",
18-
// "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7",
19-
// },
20-
// "args": [
21-
// "--nproc_per_node=3",
22-
// "--nnodes=1",
23-
// "--node_rank=0",
24-
// "--master_addr=127.0.0.1",
25-
// "--master_port=29500",
26-
// "llava/train/train_mem.py",
27-
// "--deepspeed", "scripts/zero3.json",
28-
// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-7b-ov",
29-
// "--version", "qwen_1_5",
30-
// "--data_path", "scripts/train/onevision.yaml",
31-
// // "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data",
32-
// "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data/geo3k/",
33-
// "--video_folder", "/mediaPFM/data/haozhe/onevision/llava_video",
34-
// "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
35-
// "--mm_vision_tower_lr", "2e-6",
36-
// "--vision_tower", "google/siglip-so400m-patch14-384",
37-
// "--mm_projector_type", "mlp2x_gelu",
38-
// "--mm_vision_select_layer", "-2",
39-
// "--mm_use_im_start_end", "False",
40-
// "--mm_use_im_patch_token", "False",
41-
// "--group_by_modality_length", "True",
42-
// "--image_aspect_ratio", "anyres_max_9",
43-
// "--image_grid_pinpoints", "(1x1),...,(6x6)",
44-
// "--mm_patch_merge_type", "spatial_unpad",
45-
// "--bf16", "True",
46-
// "--run_name", "test1",
47-
// "--output_dir", "experiments/test1",
48-
// "--num_train_epochs", "1",
49-
// "--per_device_train_batch_size", "1",
50-
// "--per_device_eval_batch_size", "4",
51-
// "--gradient_accumulation_steps", "2",
52-
// "--evaluation_strategy", "no",
53-
// "--save_strategy", "steps",
54-
// "--save_steps", "1000",
55-
// "--save_total_limit", "1",
56-
// "--learning_rate", "1e-5",
57-
// "--weight_decay", "0.",
58-
// "--warmup_ratio", "0.03",
59-
// "--lr_scheduler_type", "cosine",
60-
// "--logging_steps", "1",
61-
// "--tf32", "True",
62-
// "--model_max_length", "32768",
63-
// "--gradient_checkpointing", "True",
64-
// "--dataloader_num_workers", "4",
65-
// "--lazy_preprocess", "True",
66-
// "--report_to", "wandb",
67-
// "--torch_compile", "True",
68-
// "--torch_compile_backend", "inductor",
69-
// "--dataloader_drop_last", "True",
70-
// "--frames_upbound", "32",
71-
// ],
90+
// "program": "docs/LLaVA_OneVision_Tutorials.py",
7291
// "console": "integratedTerminal",
92+
// "env":{"CUDA_VISIBLE_DEVICES":"0",
93+
// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"},
7394
// "justMyCode": false,
74-
// "cwd": "${workspaceFolder}"
95+
// // "args": [
96+
// // "--run_dir_name", "test",
97+
// // // "--use_big_decoder"
98+
// // ]
7599
// }
76100
// ]
77101
// }
78102

79-
80103
// {
81104
// // Use IntelliSense to learn about possible attributes.
82105
// // Hover to view descriptions of existing attributes.
@@ -87,39 +110,16 @@
87110
// "name": "Python: Current File",
88111
// "type": "debugpy",
89112
// "request": "launch",
90-
// "program": "docs/LLaVA_OneVision_Tutorials.py",
113+
// "program": "action/dataset.py",
91114
// "console": "integratedTerminal",
92-
// "env":{"CUDA_VISIBLE_DEVICES":"0",
93-
// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"},
115+
// "env":{"CUDA_VISIBLE_DEVICES":"0"},
94116
// "justMyCode": false,
95-
// // "args": [
96-
// // "--run_dir_name", "test",
97-
// // // "--use_big_decoder"
98-
// // ]
117+
// "args": [
118+
// "--root", "/mnt/SV_storage/VFM/EK100/EK100_320p_15sec_30fps_libx264",
119+
// "--train-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv",
120+
// "--val-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv",
121+
// // "--use_big_decoder"
122+
// ]
99123
// }
100124
// ]
101-
// }
102-
103-
{
104-
// Use IntelliSense to learn about possible attributes.
105-
// Hover to view descriptions of existing attributes.
106-
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
107-
"version": "0.2.0",
108-
"configurations": [
109-
{
110-
"name": "Python: Current File",
111-
"type": "debugpy",
112-
"request": "launch",
113-
"program": "action/dataset.py",
114-
"console": "integratedTerminal",
115-
"env":{"CUDA_VISIBLE_DEVICES":"0"},
116-
"justMyCode": false,
117-
"args": [
118-
"--root", "/mnt/SV_storage/VFM/EK100/EK100_320p_15sec_30fps_libx264",
119-
"--train-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv",
120-
"--val-metadata", "/mnt/SV_storage/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv",
121-
// "--use_big_decoder"
122-
]
123-
}
124-
]
125-
}
125+
// }

run_EK100_2.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
python3 action/dataset.py \
2+
--root /media/data/haozhe/VFM/EK100/EK100_320p_15sec_30fps_libx264 \
3+
--train-metadata /media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_train.csv \
4+
--val-metadata /media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
5+
--llm_size 0.5b \
6+
--llava_num_frames 16 > kitchen_test_2.out 2>&1 \

scripts/train/onevision.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ datasets:
6767
# sampling_strategy: "all"
6868
# - json_path: /mnt/bn/vl-research/data/llava_instruct/real_vision_flan/mathqa_29837.json
6969
# sampling_strategy: "all"
70-
- json_path: /media/data/haozhe/VFM/onevision/llava_instruct/geo3k.json
71-
# - json_path: /mediaPFM/data/haozhe/onevision/llava_instruct/geo3k.json
70+
# - json_path: /media/data/haozhe/VFM/onevision/llava_instruct/geo3k.json
71+
- json_path: /mediaPFM/data/haozhe/onevision/llava_instruct/geo3k.json
7272
sampling_strategy: "all"
7373
# - json_path: /mnt/bn/vl-research/data/llava_instruct/real_vision_flan/geo170k_qa_converted_67833.json
7474
# sampling_strategy: "first:10%"

0 commit comments

Comments
 (0)