33# Export environment variables
44export CUDA_VISIBLE_DEVICES=" 0,1,2,3"
55export OMP_NUM_THREADS=" 8"
6- export NCCL_IB_DISABLE=" 0"
7- export NCCL_IB_GID_INDEX=" 3"
8- export NCCL_SOCKET_IFNAME=" eth0"
9- export NCCL_DEBUG=" INFO"
6+ # export NCCL_IB_DISABLE="0"
7+ # export NCCL_IB_GID_INDEX="3"
8+ # export NCCL_SOCKET_IFNAME="eth0"
9+ # export NCCL_DEBUG="INFO"
1010export ACCELERATE_CPU_AFFINITY=" 1"
1111# export LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libffi.so.7"
1212export WANDB_API_KEY=" 65aeda82a75f1eed29c8e9250b175fcc73dca0d7"
@@ -18,46 +18,51 @@ torchrun --nproc_per_node=4 \
1818 --master_addr=127.0.0.1 \
1919 --master_port=29500 \
2020 llava/train/train_mem.py \
21- --deepspeed scripts/zero3.json \
22- --model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov \
23- --version qwen_1_5 \
24- --data_path scripts/train/onevision.yaml \
25- --image_folder /media/data/haozhe/VFM/onevision/llava_data/geo3k/ \
26- --video_folder /media/data/haozhe/VFM/onevision/llava_video \
27- --mm_tunable_parts mm_vision_tower,mm_mlp_adapter,mm_language_model \
28- --mm_vision_tower_lr 2e-6 \
29- --vision_tower google/siglip-so400m-patch14-384 \
30- --mm_projector_type mlp2x_gelu \
31- --mm_vision_select_layer -2 \
32- --mm_use_im_start_end False \
33- --mm_use_im_patch_token False \
34- --group_by_modality_length True \
35- --image_aspect_ratio anyres_max_9 \
36- --image_grid_pinpoints " (1x1),...,(6x6)" \
37- --mm_patch_merge_type spatial_unpad \
38- --bf16 True \
39- --run_name EK100_test_new \
40- --output_dir experiments/EK100_test_new \
41- --num_train_epochs 1 \
42- --per_device_train_batch_size 1 \
43- --per_device_eval_batch_size 4 \
44- --gradient_accumulation_steps 2 \
45- --evaluation_strategy no \
46- --save_strategy steps \
47- --save_steps 1000 \
48- --save_total_limit 1 \
49- --learning_rate 1e-5 \
50- --weight_decay 0. \
51- --warmup_ratio 0.03 \
52- --lr_scheduler_type cosine \
53- --logging_steps 1 \
54- --tf32 True \
55- --model_max_length 32768 \
56- --gradient_checkpointing True \
57- --dataloader_num_workers 4 \
58- --lazy_preprocess True \
59- --report_to wandb \
60- --torch_compile True \
61- --torch_compile_backend inductor \
62- --dataloader_drop_last True \
63- --frames_upbound 32 > train_kitchen_0.5b_new.out 2>&1
21+ --deepspeed scripts/zero3.json \
22+ --model_name_or_path pretrained_models/LLaVA-Video-7B-Qwen2 \
23+ --version qwen_1_5 \
24+ --data_path scripts/train/EK100_avion_mc_top10.yaml \
25+ --video_folder /capstor/scratch/cscs/hqi/llava/onevision/llava_video \
26+ --mm_tunable_parts mm_vision_tower,mm_mlp_adapter,mm_language_model \
27+ --mm_vision_tower_lr 2e-6 \
28+ --vision_tower pretrained_models/siglip-so400m-patch14-384 \
29+ --mm_projector_type mlp2x_gelu \
30+ --mm_vision_select_layer -2 \
31+ --mm_use_im_start_end False \
32+ --mm_use_im_patch_token False \
33+ --group_by_modality_length True \
34+ --image_aspect_ratio anyres_max_9 \
35+ --image_grid_pinpoints " (1x1),...,(6x6)" \
36+ --mm_patch_merge_type spatial_unpad \
37+ --bf16 True \
38+ --run_name todi_llava_video_7b_avion_mc_top10_5epochs_test \
39+ --output_dir experiments/todi_llava_video_7b_avion_mc_top10_5epochs_test \
40+ --num_train_epochs 5 \
41+ --per_device_train_batch_size 2 \
42+ --per_device_eval_batch_size 4 \
43+ --gradient_accumulation_steps 2 \
44+ --evaluation_strategy steps \
45+ --eval_steps 2000\
46+ --save_strategy steps \
47+ --save_steps 1000 \
48+ --learning_rate 1e-5 \
49+ --weight_decay 0. \
50+ --warmup_ratio 0.03 \
51+ --lr_scheduler_type cosine \
52+ --logging_steps 1 \
53+ --tf32 True \
54+ --model_max_length 32768 \
55+ --gradient_checkpointing True \
56+ --dataloader_num_workers 4 \
57+ --lazy_preprocess True \
58+ --report_to wandb \
59+ --torch_compile True \
60+ --torch_compile_backend inductor \
61+ --dataloader_drop_last True \
62+ --frames_upbound 32 \
63+ --root /capstor/scratch/cscs/hqi/llava/onevision/llava_video/EK100 \
64+ --action_predictions /capstor/scratch/cscs/hqi/llava/EK100/avion_predictions_test.json \
65+ --val_metadata /capstor/scratch/cscs/hqi/llava/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
66+ --llava_num_frames 32 \
67+ --clip_length 32 \
68+ --topk_predictions 10
0 commit comments