|
| 1 | +{ |
| 2 | + "version": "0.2.0", |
| 3 | + "configurations": [ |
| 4 | + { |
| 5 | + "name": "Run LLAVA Training with torchrun", |
| 6 | + "type": "debugpy", |
| 7 | + "request": "launch", |
| 8 | + "module": "torch.distributed.run", |
| 9 | + "env": { |
| 10 | + "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
| 11 | + "OMP_NUM_THREADS": "8", |
| 12 | + "NCCL_IB_DISABLE": "0", |
| 13 | + "NCCL_IB_GID_INDEX": "3", |
| 14 | + "NCCL_SOCKET_IFNAME": "eth0", |
| 15 | + "NCCL_DEBUG": "INFO", |
| 16 | + "ACCELERATE_CPU_AFFINITY": "1", |
| 17 | + "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7", |
| 18 | + "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
| 19 | + "CUDA_LAUNCH_BLOCKING": "1", |
| 20 | + "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
| 21 | + }, |
| 22 | + "args": [ |
| 23 | + "--nproc_per_node=4", |
| 24 | + "--nnodes=1", |
| 25 | + "llava/train/train_mem.py", |
| 26 | + "--deepspeed", "scripts/zero3.json", |
| 27 | + "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
| 28 | + "--version", "qwen_1_5", |
| 29 | + "--data_path", "scripts/train/llava_video_RCP.yaml", |
| 30 | + "--video_folder", "/media/data/haozhe/VFM/onevision/llava_video", |
| 31 | + "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
| 32 | + "--mm_vision_tower_lr", "2e-6", |
| 33 | + "--vision_tower", "google/siglip-so400m-patch14-384", |
| 34 | + "--mm_projector_type", "mlp2x_gelu", |
| 35 | + "--mm_vision_select_layer", "-2", |
| 36 | + "--mm_use_im_start_end", "False", |
| 37 | + "--mm_use_im_patch_token", "False", |
| 38 | + "--group_by_modality_length", "True", |
| 39 | + "--image_aspect_ratio", "anyres_max_9", |
| 40 | + "--image_grid_pinpoints", "(1x1),...,(6x6)", |
| 41 | + "--mm_patch_merge_type", "spatial_unpad", |
| 42 | + "--bf16", "True", |
| 43 | + "--run_name", "dev_0.5b_llavavideo_haozhe", |
| 44 | + "--output_dir", "experiments/dev_0.5b_llavavideo_haozhe", |
| 45 | + "--num_train_epochs", "1", |
| 46 | + "--per_device_train_batch_size", "1", |
| 47 | + "--per_device_eval_batch_size", "4", |
| 48 | + "--gradient_accumulation_steps", "2", |
| 49 | + "--evaluation_strategy", "epoch", |
| 50 | + "--eval_steps", "1", |
| 51 | + "--save_strategy", "steps", |
| 52 | + "--save_steps", "2000", |
| 53 | + "--learning_rate", "1e-5", |
| 54 | + "--weight_decay", "0.", |
| 55 | + "--warmup_ratio", "0.03", |
| 56 | + "--lr_scheduler_type", "cosine", |
| 57 | + "--logging_steps", "1", |
| 58 | + "--tf32", "True", |
| 59 | + "--model_max_length", "32768", |
| 60 | + "--gradient_checkpointing", "True", |
| 61 | + "--dataloader_num_workers", "4", |
| 62 | + "--lazy_preprocess", "True", |
| 63 | + "--report_to", "wandb", |
| 64 | + "--torch_compile", "True", |
| 65 | + "--torch_compile_backend", "inductor", |
| 66 | + "--dataloader_drop_last", "True", |
| 67 | + "--frames_upbound", "64", |
| 68 | + "--mm_newline_position", "grid", |
| 69 | + "--add_time_instruction", "True", |
| 70 | + "--force_sample", "True", |
| 71 | + "--mm_spatial_pool_stride", "2", |
| 72 | + "--root", "/media/data/haozhe/VFM/onevision/llava_video/EK100", |
| 73 | + "--action_predictions", "/media/data/haozhe/VFM/EK100/EK100_in_LLAVA/TIM/tim_pred_ids_val.json", |
| 74 | + "--val_metadata", "/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
| 75 | + "--llava_num_frames", "64", |
| 76 | + "--clip_length", "64", |
| 77 | + "--action_representation", "official_key", |
| 78 | + "--topk_predictions", "5" |
| 79 | + ], |
| 80 | + "console": "integratedTerminal", |
| 81 | + "justMyCode": false, |
| 82 | + "cwd": "${workspaceFolder}" |
| 83 | + } |
| 84 | + ] |
| 85 | +} |
| 86 | + |
| 87 | + |
1 | 88 | // { |
2 | 89 | // "version": "0.2.0", |
3 | 90 | // "configurations": [ |
4 | 91 | // { |
5 | 92 | // "name": "Run LLAVA Training with torchrun", |
6 | 93 | // "type": "debugpy", |
7 | 94 | // "request": "launch", |
8 | | -// "module": "torch.distributed.run", |
| 95 | +// "python": "/media/data/haozhe/VFM/llmseval-venv/bin/python", |
| 96 | +// "module": "accelerate.commands.launch", |
9 | 97 | // "env": { |
10 | | -// "CUDA_VISIBLE_DEVICES": "0,2,3", |
| 98 | +// "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
11 | 99 | // "OMP_NUM_THREADS": "8", |
12 | 100 | // "NCCL_IB_DISABLE": "0", |
13 | 101 | // "NCCL_IB_GID_INDEX": "3", |
|
17 | 105 | // "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7", |
18 | 106 | // "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
19 | 107 | // "CUDA_LAUNCH_BLOCKING": "1", |
| 108 | +// "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
| 109 | +// "OPENAI_API_KEY": "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" |
20 | 110 | // }, |
21 | 111 | // "args": [ |
22 | | -// "--nproc_per_node=3", |
23 | | -// "--nnodes=1", |
24 | | -// "--node_rank=0", |
25 | | -// "--master_addr=127.0.0.1", |
26 | | -// "--master_port=29500", |
27 | | -// "llava/train/train_mem.py", |
28 | | -// "--deepspeed", "scripts/zero3.json", |
29 | | -// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
30 | | -// "--version", "qwen_1_5", |
31 | | -// "--data_path", "scripts/train/onevision.yaml", |
32 | | -// // "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data", |
33 | | -// "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data/geo3k/", |
34 | | -// "--video_folder", "/mediaPFM/data/haozhe/onevision/llava_video", |
35 | | -// // "--video_folder", "/home/haozhe/kitchen/AVION/datasets", |
36 | | -// "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
37 | | -// "--mm_vision_tower_lr", "2e-6", |
38 | | -// "--vision_tower", "google/siglip-so400m-patch14-384", |
39 | | -// "--mm_projector_type", "mlp2x_gelu", |
40 | | -// "--mm_vision_select_layer", "-2", |
41 | | -// "--mm_use_im_start_end", "False", |
42 | | -// "--mm_use_im_patch_token", "False", |
43 | | -// "--group_by_modality_length", "True", |
44 | | -// "--image_aspect_ratio", "anyres_max_9", |
45 | | -// "--image_grid_pinpoints", "(1x1),...,(6x6)", |
46 | | -// "--mm_patch_merge_type", "spatial_unpad", |
47 | | -// "--bf16", "True", |
48 | | -// "--run_name", "test1", |
49 | | -// "--output_dir", "experiments/test1", |
50 | | -// "--num_train_epochs", "1", |
51 | | -// "--per_device_train_batch_size", "1", |
52 | | -// "--per_device_eval_batch_size", "4", |
53 | | -// "--gradient_accumulation_steps", "2", |
54 | | -// "--evaluation_strategy", "steps", |
55 | | -// "--eval_steps", "100", |
56 | | -// "--save_strategy", "steps", |
57 | | -// "--save_steps", "2000", |
58 | | -// // "--save_total_limit", "1", |
59 | | -// "--learning_rate", "1e-5", |
60 | | -// "--weight_decay", "0.", |
61 | | -// "--warmup_ratio", "0.03", |
62 | | -// "--lr_scheduler_type", "cosine", |
63 | | -// "--logging_steps", "1", |
64 | | -// "--tf32", "True", |
65 | | -// "--model_max_length", "32768", |
66 | | -// "--gradient_checkpointing", "True", |
67 | | -// "--dataloader_num_workers", "4", |
68 | | -// "--lazy_preprocess", "True", |
69 | | -// "--report_to", "wandb", |
70 | | -// "--torch_compile", "True", |
71 | | -// "--torch_compile_backend", "inductor", |
72 | | -// "--dataloader_drop_last", "True", |
73 | | -// "--frames_upbound", "16", |
74 | | -// "--root", "/mediaPFM/data/haozhe/onevision/llava_video/EK100", |
75 | | -// "--action_predictions", "/mediaPFM/data/haozhe/EK100/EK100_in_LLAVA/avion_pred_ids_val.json", |
76 | | -// "--val_metadata", "/mediaPFM/data/haozhe/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
77 | | -// "--llava_num_frames", "16", |
78 | | -// "--clip_length", "16", |
79 | | -// "--action_representation", "GT_random_narration", |
80 | | -// "--topk_predictions", "5", |
81 | | -// "--dataset", "ek100_cls", |
82 | | -// "--vision_supervision", "three_tokens", |
83 | | -// "--action_types", "97,300,3806" |
| 112 | +// "--num_processes", "4", |
| 113 | +// "-m", "lmms_eval", |
| 114 | +// // "--model", "llava_vid", |
| 115 | +// "--model", "llava_onevision", |
| 116 | +// // "--model_args", "pretrained=experiments/dev_LLaVA-Video-7B-Qwen2_4f_test_haozhe,conv_template=qwen_1_5,max_frames_num=64,mm_spatial_pool_mode=average", |
| 117 | +// "--model_args", "pretrained=lmms-lab/llava-onevision-qwen2-0.5b-ov,conv_template=qwen_1_5,model_name=llava_qwen", |
| 118 | +// "--tasks", "video_dc499", |
| 119 | +// "--batch_size", "1", |
| 120 | +// "--log_samples", |
| 121 | +// "--log_samples_suffix", "llava_onevision", |
| 122 | +// "--output_path", "./logs/" |
84 | 123 | // ], |
85 | 124 | // "console": "integratedTerminal", |
86 | 125 | // "justMyCode": false, |
|
89 | 128 | // ] |
90 | 129 | // } |
91 | 130 |
|
92 | | - |
93 | 131 | // { |
94 | 132 | // // Use IntelliSense to learn about possible attributes. |
95 | 133 | // // Hover to view descriptions of existing attributes. |
|
0 commit comments