|
| 1 | +// { |
| 2 | +// "version": "0.2.0", |
| 3 | +// "configurations": [ |
| 4 | +// { |
| 5 | +// "name": "Run LLAVA Training with torchrun", |
| 6 | +// "type": "debugpy", |
| 7 | +// "request": "launch", |
| 8 | +// "module": "torch.distributed.run", |
| 9 | +// "env": { |
| 10 | +// "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
| 11 | +// "OMP_NUM_THREADS": "8", |
| 12 | +// "NCCL_IB_DISABLE": "0", |
| 13 | +// "NCCL_IB_GID_INDEX": "3", |
| 14 | +// "NCCL_SOCKET_IFNAME": "eth0", |
| 15 | +// "NCCL_DEBUG": "INFO", |
| 16 | +// "ACCELERATE_CPU_AFFINITY": "1", |
| 17 | +// "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7", |
| 18 | +// "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7", |
| 19 | +// "CUDA_LAUNCH_BLOCKING": "1", |
| 20 | +// "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
| 21 | +// }, |
| 22 | +// "args": [ |
| 23 | +// "--nproc_per_node=4", |
| 24 | +// "--nnodes=1", |
| 25 | +// "llava/train/train_mem.py", |
| 26 | +// "--deepspeed", "scripts/zero3.json", |
| 27 | +// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
| 28 | +// "--version", "qwen_1_5", |
| 29 | +// "--data_path", "scripts/train/llava_video.yaml", |
| 30 | +// "--video_folder", "/media/data/haozhe/VFM/onevision/llava_video", |
| 31 | +// "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
| 32 | +// "--mm_vision_tower_lr", "2e-6", |
| 33 | +// "--vision_tower", "google/siglip-so400m-patch14-384", |
| 34 | +// "--mm_projector_type", "mlp2x_gelu", |
| 35 | +// "--mm_vision_select_layer", "-2", |
| 36 | +// "--mm_use_im_start_end", "False", |
| 37 | +// "--mm_use_im_patch_token", "False", |
| 38 | +// "--group_by_modality_length", "True", |
| 39 | +// "--image_aspect_ratio", "anyres_max_9", |
| 40 | +// "--image_grid_pinpoints", "(1x1),...,(6x6)", |
| 41 | +// "--mm_patch_merge_type", "spatial_unpad", |
| 42 | +// "--bf16", "True", |
| 43 | +// "--run_name", "dev_0.5b_4f_llavavideo_haozhe", |
| 44 | +// "--output_dir", "experiments/dev_0.5b_4f_llavavideo_haozhe", |
| 45 | +// "--num_train_epochs", "1", |
| 46 | +// "--per_device_train_batch_size", "8", |
| 47 | +// "--per_device_eval_batch_size", "4", |
| 48 | +// "--gradient_accumulation_steps", "2", |
| 49 | +// "--evaluation_strategy", "epoch", |
| 50 | +// "--eval_steps", "1", |
| 51 | +// "--save_strategy", "steps", |
| 52 | +// "--save_steps", "2000", |
| 53 | +// "--learning_rate", "1e-5", |
| 54 | +// "--weight_decay", "0.", |
| 55 | +// "--warmup_ratio", "0.03", |
| 56 | +// "--lr_scheduler_type", "cosine", |
| 57 | +// "--logging_steps", "1", |
| 58 | +// "--tf32", "True", |
| 59 | +// "--model_max_length", "32768", |
| 60 | +// "--gradient_checkpointing", "True", |
| 61 | +// "--dataloader_num_workers", "4", |
| 62 | +// "--lazy_preprocess", "True", |
| 63 | +// "--report_to", "wandb", |
| 64 | +// "--torch_compile", "True", |
| 65 | +// "--torch_compile_backend", "inductor", |
| 66 | +// "--dataloader_drop_last", "True", |
| 67 | +// "--frames_upbound", "4", |
| 68 | +// "--root", "/media/data/haozhe/VFM/onevision/llava_video/EK100", |
| 69 | +// "--action_predictions", "/media/data/haozhe/VFM/EK100/EK100_in_LLAVA/TIM/tim_pred_ids_val.json", |
| 70 | +// "--val_metadata", "/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
| 71 | +// "--add_time_instruction", "False", |
| 72 | +// "--llava_num_frames", "4", |
| 73 | +// "--clip_length", "4", |
| 74 | +// "--action_representation", "official_key", |
| 75 | +// "--topk_predictions", "5" |
| 76 | +// ], |
| 77 | +// "console": "integratedTerminal", |
| 78 | +// "justMyCode": false, |
| 79 | +// "cwd": "${workspaceFolder}" |
| 80 | +// } |
| 81 | +// ] |
| 82 | +// } |
| 83 | + |
| 84 | + |
1 | 85 | { |
2 | 86 | "version": "0.2.0", |
3 | 87 | "configurations": [ |
4 | 88 | { |
5 | 89 | "name": "Run LLAVA Training with torchrun", |
6 | 90 | "type": "debugpy", |
7 | 91 | "request": "launch", |
8 | | - "module": "torch.distributed.run", |
| 92 | + "python": "/media/data/haozhe/VFM/llmseval-venv/bin/python", |
| 93 | + "module": "accelerate.commands.launch", |
9 | 94 | "env": { |
10 | 95 | "CUDA_VISIBLE_DEVICES": "0,1,2,3", |
11 | 96 | "OMP_NUM_THREADS": "8", |
|
20 | 105 | "HF_HOME": "/media/data/haozhe/VFM/huggingface", |
21 | 106 | }, |
22 | 107 | "args": [ |
23 | | - "--nproc_per_node=4", |
24 | | - "--nnodes=1", |
25 | | - "llava/train/train_mem.py", |
26 | | - "--deepspeed", "scripts/zero3.json", |
27 | | - "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov", |
28 | | - "--version", "qwen_1_5", |
29 | | - "--data_path", "scripts/train/llava_video.yaml", |
30 | | - "--video_folder", "/media/data/haozhe/VFM/onevision/llava_video", |
31 | | - "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model", |
32 | | - "--mm_vision_tower_lr", "2e-6", |
33 | | - "--vision_tower", "google/siglip-so400m-patch14-384", |
34 | | - "--mm_projector_type", "mlp2x_gelu", |
35 | | - "--mm_vision_select_layer", "-2", |
36 | | - "--mm_use_im_start_end", "False", |
37 | | - "--mm_use_im_patch_token", "False", |
38 | | - "--group_by_modality_length", "True", |
39 | | - "--image_aspect_ratio", "anyres_max_9", |
40 | | - "--image_grid_pinpoints", "(1x1),...,(6x6)", |
41 | | - "--mm_patch_merge_type", "spatial_unpad", |
42 | | - "--bf16", "True", |
43 | | - "--run_name", "dev_0.5b_4f_llavavideo_haozhe", |
44 | | - "--output_dir", "experiments/dev_0.5b_4f_llavavideo_haozhe", |
45 | | - "--num_train_epochs", "1", |
46 | | - "--per_device_train_batch_size", "8", |
47 | | - "--per_device_eval_batch_size", "4", |
48 | | - "--gradient_accumulation_steps", "2", |
49 | | - "--evaluation_strategy", "epoch", |
50 | | - "--eval_steps", "1", |
51 | | - "--save_strategy", "steps", |
52 | | - "--save_steps", "2000", |
53 | | - "--learning_rate", "1e-5", |
54 | | - "--weight_decay", "0.", |
55 | | - "--warmup_ratio", "0.03", |
56 | | - "--lr_scheduler_type", "cosine", |
57 | | - "--logging_steps", "1", |
58 | | - "--tf32", "True", |
59 | | - "--model_max_length", "32768", |
60 | | - "--gradient_checkpointing", "True", |
61 | | - "--dataloader_num_workers", "4", |
62 | | - "--lazy_preprocess", "True", |
63 | | - "--report_to", "wandb", |
64 | | - "--torch_compile", "True", |
65 | | - "--torch_compile_backend", "inductor", |
66 | | - "--dataloader_drop_last", "True", |
67 | | - "--frames_upbound", "4", |
68 | | - "--root", "/media/data/haozhe/VFM/onevision/llava_video/EK100", |
69 | | - "--action_predictions", "/media/data/haozhe/VFM/EK100/EK100_in_LLAVA/TIM/tim_pred_ids_val.json", |
70 | | - "--val_metadata", "/media/data/haozhe/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv", |
71 | | - "--add_time_instruction", "False", |
72 | | - "--llava_num_frames", "4", |
73 | | - "--clip_length", "4", |
74 | | - "--action_representation", "official_key", |
75 | | - "--topk_predictions", "5" |
| 108 | + "--num_processes", "4", |
| 109 | + "-m", "lmms_eval", |
| 110 | + "--model", "llava_vid", |
| 111 | + "--model_args", "pretrained=experiments/dev_LLaVA-Video-7B-Qwen2_4f_test_haozhe,conv_template=qwen_1_5,max_frames_num=64,mm_spatial_pool_mode=average", |
| 112 | + "--tasks", "videomme", |
| 113 | + "--batch_size", "1", |
| 114 | + "--log_samples", |
| 115 | + "--log_samples_suffix", "llava_vid_retrained", |
| 116 | + "--output_path", "./logs/" |
76 | 117 | ], |
77 | 118 | "console": "integratedTerminal", |
78 | 119 | "justMyCode": false, |
|
81 | 122 | ] |
82 | 123 | } |
83 | 124 |
|
84 | | - |
85 | 125 | // { |
86 | 126 | // // Use IntelliSense to learn about possible attributes. |
87 | 127 | // // Hover to view descriptions of existing attributes. |
|
0 commit comments