|
1 | | -// { |
2 | | -// "version": "0.2.0", |
3 | | -// "configurations": [ |
4 | | -// { |
5 | | -// "name": "Run LLAVA Training with torchrun", |
6 | | -// "type": "debugpy", |
7 | | -// "request": "launch", |
8 | | -// "module": "torch.distributed.run", |
9 | | -// "env": { |
10 | | -// "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
11 | | -// "OMP_NUM_THREADS": "8", |
12 | | -// "NCCL_IB_DISABLE": "0", |
13 | | -// "NCCL_IB_GID_INDEX": "3", |
14 | | -// "NCCL_SOCKET_IFNAME": "eth0", |
15 | | -// "NCCL_DEBUG": "INFO", |
16 | | -// "ACCELERATE_CPU_AFFINITY": "1", |
17 | | -// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7", |
18 | | -// "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
19 | | -// "CUDA_LAUNCH_BLOCKING": "1", |
20 | | -// "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
21 | | -// }, |
22 | | -// "args": [ |
23 | | -// "--nproc_per_node=4", |
24 | | -// "--nnodes=1", |
25 | | -// "llava/train/train_mem.py", |
26 | | -// "--deepspeed", "scripts/zero3.json", |
27 | | -// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
28 | | -// "--version", "qwen_1_5", |
29 | | -// "--data_path", "scripts/train/llava_video.yaml", |
30 | | -// "--video_folder", "/media/data/haozhe/VFM/onevision/llava_video", |
31 | | -// "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
32 | | -// "--mm_vision_tower_lr", "2e-6", |
33 | | -// "--vision_tower", "google/siglip-so400m-patch14-384", |
34 | | -// "--mm_projector_type", "mlp2x_gelu", |
35 | | -// "--mm_vision_select_layer", "-2", |
36 | | -// "--mm_use_im_start_end", "False", |
37 | | -// "--mm_use_im_patch_token", "False", |
38 | | -// "--group_by_modality_length", "True", |
39 | | -// "--image_aspect_ratio", "anyres_max_9", |
40 | | -// "--image_grid_pinpoints", "(1x1),...,(6x6)", |
41 | | -// "--mm_patch_merge_type", "spatial_unpad", |
42 | | -// "--bf16", "True", |
43 | | -// "--run_name", "dev_0.5b_4f_llavavideo_haozhe", |
44 | | -// "--output_dir", "experiments/dev_0.5b_4f_llavavideo_haozhe", |
45 | | -// "--num_train_epochs", "1", |
46 | | -// "--per_device_train_batch_size", "8", |
47 | | -// "--per_device_eval_batch_size", "4", |
48 | | -// "--gradient_accumulation_steps", "2", |
49 | | -// "--evaluation_strategy", "epoch", |
50 | | -// "--eval_steps", "1", |
51 | | -// "--save_strategy", "steps", |
52 | | -// "--save_steps", "2000", |
53 | | -// "--learning_rate", "1e-5", |
54 | | -// "--weight_decay", "0.", |
55 | | -// "--warmup_ratio", "0.03", |
56 | | -// "--lr_scheduler_type", "cosine", |
57 | | -// "--logging_steps", "1", |
58 | | -// "--tf32", "True", |
59 | | -// "--model_max_length", "32768", |
60 | | -// "--gradient_checkpointing", "True", |
61 | | -// "--dataloader_num_workers", "4", |
62 | | -// "--lazy_preprocess", "True", |
63 | | -// "--report_to", "wandb", |
64 | | -// "--torch_compile", "True", |
65 | | -// "--torch_compile_backend", "inductor", |
66 | | -// "--dataloader_drop_last", "True", |
67 | | -// "--frames_upbound", "4", |
68 | | -// "--root", "/media/data/haozhe/VFM/onevision/llava_video/EK100", |
69 | | -// "--action_predictions", "/media/data/haozhe/VFM/EK100/EK100_in_LLAVA/TIM/tim_pred_ids_val.json", |
70 | | -// "--val_metadata", "/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
71 | | -// "--add_time_instruction", "False", |
72 | | -// "--llava_num_frames", "4", |
73 | | -// "--clip_length", "4", |
74 | | -// "--action_representation", "official_key", |
75 | | -// "--topk_predictions", "5" |
76 | | -// ], |
77 | | -// "console": "integratedTerminal", |
78 | | -// "justMyCode": false, |
79 | | -// "cwd": "${workspaceFolder}" |
80 | | -// } |
81 | | -// ] |
82 | | -// } |
83 | | - |
84 | | - |
85 | 1 | { |
86 | 2 | "version": "0.2.0", |
87 | 3 | "configurations": [ |
88 | 4 | { |
89 | 5 | "name": "Run LLAVA Training with torchrun", |
90 | 6 | "type": "debugpy", |
91 | 7 | "request": "launch", |
92 | | - "python": "/media/data/haozhe/VFM/llmseval-venv/bin/python", |
93 | | - "module": "accelerate.commands.launch", |
| 8 | + "module": "torch.distributed.run", |
94 | 9 | "env": { |
95 | 10 | "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
96 | 11 | "OMP_NUM_THREADS": "8", |
|
103 | 18 | "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
104 | 19 | "CUDA_LAUNCH_BLOCKING": "1", |
105 | 20 | "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
106 | | - "OPENAI_API_KEY": "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" |
107 | 21 | }, |
108 | 22 | "args": [ |
109 | | - "--num_processes", "4", |
110 | | - "-m", "lmms_eval", |
111 | | - // "--model", "llava_vid", |
112 | | - "--model", "llava_onevision", |
113 | | - // "--model_args", "pretrained=experiments/dev_LLaVA-Video-7B-Qwen2_4f_test_haozhe,conv_template=qwen_1_5,max_frames_num=64,mm_spatial_pool_mode=average", |
114 | | - "--model_args", "pretrained=lmms-lab/llava-onevision-qwen2-0.5b-ov,conv_template=qwen_1_5,model_name=llava_qwen", |
115 | | - "--tasks", "video_dc499", |
116 | | - "--batch_size", "1", |
117 | | - "--log_samples", |
118 | | - "--log_samples_suffix", "llava_onevision", |
119 | | - "--output_path", "./logs/" |
| 23 | + "--nproc_per_node=4", |
| 24 | + "--nnodes=1", |
| 25 | + "llava/train/train_mem.py", |
| 26 | + "--deepspeed", "scripts/zero3.json", |
| 27 | + "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
| 28 | + "--version", "qwen_1_5", |
| 29 | + "--data_path", "scripts/train/llava_video_RCP.yaml", |
| 30 | + "--video_folder", "/media/data/haozhe/VFM/onevision/llava_video", |
| 31 | + "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
| 32 | + "--mm_vision_tower_lr", "2e-6", |
| 33 | + "--vision_tower", "google/siglip-so400m-patch14-384", |
| 34 | + "--mm_projector_type", "mlp2x_gelu", |
| 35 | + "--mm_vision_select_layer", "-2", |
| 36 | + "--mm_use_im_start_end", "False", |
| 37 | + "--mm_use_im_patch_token", "False", |
| 38 | + "--group_by_modality_length", "True", |
| 39 | + "--image_aspect_ratio", "anyres_max_9", |
| 40 | + "--image_grid_pinpoints", "(1x1),...,(6x6)", |
| 41 | + "--mm_patch_merge_type", "spatial_unpad", |
| 42 | + "--bf16", "True", |
| 43 | + "--run_name", "dev_0.5b_llavavideo_haozhe", |
| 44 | + "--output_dir", "experiments/dev_0.5b_llavavideo_haozhe", |
| 45 | + "--num_train_epochs", "1", |
| 46 | + "--per_device_train_batch_size", "1", |
| 47 | + "--per_device_eval_batch_size", "4", |
| 48 | + "--gradient_accumulation_steps", "2", |
| 49 | + "--evaluation_strategy", "epoch", |
| 50 | + "--eval_steps", "1", |
| 51 | + "--save_strategy", "steps", |
| 52 | + "--save_steps", "2000", |
| 53 | + "--learning_rate", "1e-5", |
| 54 | + "--weight_decay", "0.", |
| 55 | + "--warmup_ratio", "0.03", |
| 56 | + "--lr_scheduler_type", "cosine", |
| 57 | + "--logging_steps", "1", |
| 58 | + "--tf32", "True", |
| 59 | + "--model_max_length", "32768", |
| 60 | + "--gradient_checkpointing", "True", |
| 61 | + "--dataloader_num_workers", "4", |
| 62 | + "--lazy_preprocess", "True", |
| 63 | + "--report_to", "wandb", |
| 64 | + "--torch_compile", "True", |
| 65 | + "--torch_compile_backend", "inductor", |
| 66 | + "--dataloader_drop_last", "True", |
| 67 | + "--frames_upbound", "64", |
| 68 | + "--mm_newline_position", "grid", |
| 69 | + "--add_time_instruction", "True", |
| 70 | + "--force_sample", "True", |
| 71 | + "--mm_spatial_pool_stride", "2", |
| 72 | + "--root", "/media/data/haozhe/VFM/onevision/llava_video/EK100", |
| 73 | + "--action_predictions", "/media/data/haozhe/VFM/EK100/EK100_in_LLAVA/TIM/tim_pred_ids_val.json", |
| 74 | + "--val_metadata", "/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
| 75 | + "--llava_num_frames", "64", |
| 76 | + "--clip_length", "64", |
| 77 | + "--action_representation", "official_key", |
| 78 | + "--topk_predictions", "5" |
120 | 79 | ], |
121 | 80 | "console": "integratedTerminal", |
122 | 81 | "justMyCode": false, |
|
125 | 84 | ] |
126 | 85 | } |
127 | 86 |
|
| 87 | + |
| 88 | +// { |
| 89 | +// "version": "0.2.0", |
| 90 | +// "configurations": [ |
| 91 | +// { |
| 92 | +// "name": "Run LLAVA Training with torchrun", |
| 93 | +// "type": "debugpy", |
| 94 | +// "request": "launch", |
| 95 | +// "python": "/media/data/haozhe/VFM/llmseval-venv/bin/python", |
| 96 | +// "module": "accelerate.commands.launch", |
| 97 | +// "env": { |
| 98 | +// "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
| 99 | +// "OMP_NUM_THREADS": "8", |
| 100 | +// "NCCL_IB_DISABLE": "0", |
| 101 | +// "NCCL_IB_GID_INDEX": "3", |
| 102 | +// "NCCL_SOCKET_IFNAME": "eth0", |
| 103 | +// "NCCL_DEBUG": "INFO", |
| 104 | +// "ACCELERATE_CPU_AFFINITY": "1", |
| 105 | +// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7", |
| 106 | +// "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
| 107 | +// "CUDA_LAUNCH_BLOCKING": "1", |
| 108 | +// "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
| 109 | +// "OPENAI_API_KEY": "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" |
| 110 | +// }, |
| 111 | +// "args": [ |
| 112 | +// "--num_processes", "4", |
| 113 | +// "-m", "lmms_eval", |
| 114 | +// // "--model", "llava_vid", |
| 115 | +// "--model", "llava_onevision", |
| 116 | +// // "--model_args", "pretrained=experiments/dev_LLaVA-Video-7B-Qwen2_4f_test_haozhe,conv_template=qwen_1_5,max_frames_num=64,mm_spatial_pool_mode=average", |
| 117 | +// "--model_args", "pretrained=lmms-lab/llava-onevision-qwen2-0.5b-ov,conv_template=qwen_1_5,model_name=llava_qwen", |
| 118 | +// "--tasks", "video_dc499", |
| 119 | +// "--batch_size", "1", |
| 120 | +// "--log_samples", |
| 121 | +// "--log_samples_suffix", "llava_onevision", |
| 122 | +// "--output_path", "./logs/" |
| 123 | +// ], |
| 124 | +// "console": "integratedTerminal", |
| 125 | +// "justMyCode": false, |
| 126 | +// "cwd": "${workspaceFolder}" |
| 127 | +// } |
| 128 | +// ] |
| 129 | +// } |
| 130 | + |
128 | 131 | // { |
129 | 132 | // // Use IntelliSense to learn about possible attributes. |
130 | 133 | // // Hover to view descriptions of existing attributes. |
|
0 commit comments