Skip to content

Commit c65b98c

Browse files
committed
Merge branch 'main' of github.com:HaozheQi/LLaVA-NeXT into main
2 parents 333808e + 381dd49 commit c65b98c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+536
-199
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ dist
1515
# Editor
1616
.idea
1717
*.swp
18-
.vscode
1918

2019
# Other
2120
.DS_Store
@@ -71,3 +70,7 @@ playground/*.json
7170
mlx_configs/
7271
data_processing/
7372
# demo/
73+
74+
75+
experiments/
76+
*.out

.vscode/launch.json

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Run LLAVA Training with torchrun",
6+
"type": "debugpy",
7+
"request": "launch",
8+
"module": "torch.distributed.run",
9+
"env": {
10+
"CUDA_VISIBLE_DEVICES": "1,2",
11+
"OMP_NUM_THREADS": "8",
12+
"NCCL_IB_DISABLE": "0",
13+
"NCCL_IB_GID_INDEX": "3",
14+
"NCCL_SOCKET_IFNAME": "eth0",
15+
"NCCL_DEBUG": "INFO",
16+
"ACCELERATE_CPU_AFFINITY": "1",
17+
"LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7",
18+
},
19+
"args": [
20+
"--nproc_per_node=2",
21+
"--nnodes=1",
22+
"--node_rank=0",
23+
"--master_addr=127.0.0.1",
24+
"--master_port=29500",
25+
"llava/train/train_mem.py",
26+
"--deepspeed", "scripts/zero3.json",
27+
"--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov",
28+
"--version", "qwen_1_5",
29+
"--data_path", "scripts/train/onevision.yaml",
30+
// "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data",
31+
"--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data/geo3k/",
32+
"--video_folder", "/mediaPFM/data/haozhe/onevision/llava_video",
33+
"--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
34+
"--mm_vision_tower_lr", "2e-6",
35+
"--vision_tower", "google/siglip-so400m-patch14-384",
36+
"--mm_projector_type", "mlp2x_gelu",
37+
"--mm_vision_select_layer", "-2",
38+
"--mm_use_im_start_end", "False",
39+
"--mm_use_im_patch_token", "False",
40+
"--group_by_modality_length", "True",
41+
"--image_aspect_ratio", "anyres_max_9",
42+
"--image_grid_pinpoints", "(1x1),...,(6x6)",
43+
"--mm_patch_merge_type", "spatial_unpad",
44+
"--bf16", "True",
45+
"--run_name", "test",
46+
"--output_dir", "experiments/test",
47+
"--num_train_epochs", "1",
48+
"--per_device_train_batch_size", "1",
49+
"--per_device_eval_batch_size", "4",
50+
"--gradient_accumulation_steps", "2",
51+
"--evaluation_strategy", "no",
52+
"--save_strategy", "steps",
53+
"--save_steps", "1000",
54+
"--save_total_limit", "1",
55+
"--learning_rate", "1e-5",
56+
"--weight_decay", "0.",
57+
"--warmup_ratio", "0.03",
58+
"--lr_scheduler_type", "cosine",
59+
"--logging_steps", "1",
60+
"--tf32", "True",
61+
"--model_max_length", "32768",
62+
"--gradient_checkpointing", "True",
63+
"--dataloader_num_workers", "4",
64+
"--lazy_preprocess", "True",
65+
"--report_to", "wandb",
66+
"--torch_compile", "True",
67+
"--torch_compile_backend", "inductor",
68+
"--dataloader_drop_last", "True",
69+
"--frames_upbound", "32",
70+
],
71+
"console": "integratedTerminal",
72+
"justMyCode": false,
73+
"cwd": "${workspaceFolder}"
74+
}
75+
]
76+
}
77+
78+
79+
// {
80+
// // Use IntelliSense to learn about possible attributes.
81+
// // Hover to view descriptions of existing attributes.
82+
// // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
83+
// "version": "0.2.0",
84+
// "configurations": [
85+
// {
86+
// "name": "Python: Current File",
87+
// "type": "debugpy",
88+
// "request": "launch",
89+
// "program": "docs/LLaVA_OneVision_Tutorials.py",
90+
// "console": "integratedTerminal",
91+
// "env":{"CUDA_VISIBLE_DEVICES":"0",
92+
// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7",
93+
// "LD_LIBRARY_PATH": "/home/haozhe/miniconda3/envs/llava/lib"},
94+
// "justMyCode": false,
95+
// // "args": [
96+
// // "--run_dir_name", "test",
97+
// // // "--use_big_decoder"
98+
// // ]
99+
// }
100+
// ]
101+
// }

add_dataset_name.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import json
2+
import os
3+
4+
json_root = '/mediaPFM/data/haozhe/onevision/llava_instruct_old'
5+
save_root = '/mediaPFM/data/haozhe/onevision/llava_instruct'
6+
7+
json_list = os.listdir(json_root)
8+
for json_name in json_list:
9+
json_path = os.path.join(json_root, json_name)
10+
if json_path.endswith(".jsonl"):
11+
cur_data_dict = []
12+
with open(json_path, "r") as json_file:
13+
for line in json_file:
14+
cur_data_dict.append(json.loads(line.strip()))
15+
elif json_path.endswith(".json"):
16+
with open(json_path, "r") as json_file:
17+
cur_data_dict = json.load(json_file)
18+
else:
19+
raise ValueError(f"Unsupported file type: {json_path}")
20+
21+
dataset_name = json_path.split('/')[-1].split('.')[0]
22+
for data in cur_data_dict:
23+
data['dataset_name'] = dataset_name
24+
25+
# save back
26+
save_path = os.path.join(save_root, json_name)
27+
with open(save_path, "w") as json_file:
28+
if json_path.endswith(".jsonl"):
29+
for data in cur_data_dict:
30+
json_file.write(json.dumps(data) + "\n")
31+
elif json_path.endswith(".json"):
32+
json.dump(cur_data_dict, json_file, indent=4)
33+
aa = 1

cog.yaml

100644100755
File mode changed.

docs/LLaVA-NeXT-Interleave.md

100644100755
File mode changed.

docs/LLaVA-NeXT-Video.md

100644100755
File mode changed.

docs/LLaVA-NeXT-Video_0716.md

100644100755
File mode changed.

docs/LLaVA-NeXT.md

100644100755
File mode changed.

docs/LLaVA_OneVision.md

100644100755
File mode changed.

docs/LLaVA_OneVision_Chat.md

100644100755
File mode changed.

0 commit comments

Comments
 (0)