33# SBATCH -A a-a03
44# SBATCH --hint nomultithread
55# SBATCH --cpus-per-task 288
6- # SBATCH --mem=460000
76# SBATCH --no-requeue
8- # SBATCH --nodes 16 # number of Nodes
7+ # SBATCH --nodes 32 # number of Nodes
98# SBATCH --ntasks-per-node 1 # number of MP tasks. IMPORTANT: torchrun represents just 1 Slurm task
109# SBATCH --gres gpu:4 # Number of GPUs
11- # SBATCH --time 23 :00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
12- # SBATCH --output logs /R-%x.%j-dev_7b_64f_EK100_haozhe .out
13- # SBATCH --error logs /R-%x.%j-dev_7b_64f_EK100_haozhe .err
10+ # SBATCH --time 20 :00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
11+ # SBATCH --output temp /R-%x.%j_dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time .out
12+ # SBATCH --error temp /R-%x.%j_dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time .err
1413
1514mkdir -p logs
1615
@@ -58,8 +57,8 @@ PYTHON_ARGS=" \
5857 --deepspeed scripts/zero3.json \
5958 --model_name_or_path lmms-lab/LLaVA-Video-7B-Qwen2 \
6059 --version qwen_1_5 \
61- --data_path scripts/train/llava_video .yaml \
62- --video_folder /iopsstor/scratch/cscs/hqi /VFM/onevision/llava_video \
60+ --data_path scripts/train/avion_tim_top5_gpt4o_detection_direct_178K_100percent .yaml \
61+ --video_folder /iopsstor/scratch/anonymous_server/anonymous /VFM/onevision/llava_video/ \
6362 --mm_tunable_parts mm_vision_tower,mm_mlp_adapter,mm_language_model \
6463 --mm_vision_tower_lr 2e-6 \
6564 --vision_tower google/siglip-so400m-patch14-384 \
@@ -72,21 +71,16 @@ PYTHON_ARGS=" \
7271 --image_grid_pinpoints \" (1x1),...,(6x6)\" \
7372 --mm_patch_merge_type spatial_unpad \
7473 --bf16 True \
75- --run_name dev_7b_64f_EK100_haozhe \
76- --output_dir experiments/dev_7b_64f_EK100_haozhe \
74+ --run_name dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time \
75+ --output_dir temp/dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time \
7776 --num_train_epochs 1 \
7877 --per_device_train_batch_size 1 \
7978 --per_device_eval_batch_size 4 \
8079 --gradient_accumulation_steps 2 \
81- <<<<<<< HEAD:run_todi.sbatch
82- --evaluation_strategy steps \
83- --eval_steps 200000\
84- =======
8580 --evaluation_strategy epoch \
86- --eval_steps 1 \
87- >>>>>>> origin/haozhedev:run_clariden.sbatch
81+ --eval_steps 1\
8882 --save_strategy steps \
89- --save_steps 2000 \
83+ --save_steps 1000 \
9084 --learning_rate 1e-5 \
9185 --weight_decay 0. \
9286 --warmup_ratio 0.03 \
@@ -102,24 +96,25 @@ PYTHON_ARGS=" \
10296 --torch_compile_backend inductor \
10397 --dataloader_drop_last True \
10498 --frames_upbound 64 \
105- --mm_newline_position grid \
106- --add_time_instruction True \
107- --force_sample True \
108- --mm_spatial_pool_stride 2 \
109- --root /iopsstor/scratch/cscs/hqi/VFM/onevision/llava_video/EK100 \
110- --action_predictions /iopsstor/scratch/cscs/hqi/VFM/llava_data/TIM_PREDS/tim_pred_ids_val.json \
111- --val_metadata /iopsstor/scratch/cscs/hqi/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
99+ --root /iopsstor/scratch/anonymous_server/anonymous/VFM/onevision/llava_video/EK100/ \
100+ --action_predictions /iopsstor/scratch/anonymous_server/anonymous/VFM/llava_data/TIM_PREDS/tim_pred_ids_val.json \
101+ --val_metadata /iopsstor/scratch/anonymous_server/anonymous/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
112102 --llava_num_frames 64 \
103+ --add_time_instruction True \
113104 --clip_length 64 \
114- --action_representation official_key \
115105 --topk_predictions 5 \
106+ --action_representation GT_random_narration \
107+ --vision_supervision one_token \
108+ --vision_token_training last_layer \
109+ --action_types 97,300,3806 \
110+ --learn_neighbor_actions prior \
116111 "
117112
118113export CMD=" $LAUNCHER $PYTHON_FILE $PYTHON_ARGS "
119114export HF_HOME=$SCRATCH /huggingface
120115export OMP_NUM_THREADS=" 8"
121116export ACCELERATE_CPU_AFFINITY=" 1"
122- export WANDB_API_KEY=" 65aeda82a75f1eed29c8e9250b175fcc73dca0d7 "
117+ export WANDB_API_KEY=" "
123118
124119echo $CMD
125120
@@ -141,11 +136,7 @@ SRUN_ARGS=" \
141136
142137# bash -c is needed for the delayed interpolation of env vars to work
143138srun $SRUN_ARGS numactl --membind=0-3 bash -c "
144- <<<<<<< HEAD:run_todi.sbatch
145- source /capstor/scratch/cscs/hqi/llava/llava_dependency/llava-venv/bin/activate
146- =======
147- source /iopsstor/scratch/cscs/hqi/VFM/llava_dependency/llava-venv/bin/activate
148- >>>>>>> origin/haozhedev:run_clariden.sbatch
139+ source /iopsstor/scratch/anonymous_server/anonymous/VFM/llava_dependency/llava-venv/bin/activate
149140 $CMD "
150141
151- echo " END TIME: $( date) "
142+ echo " END TIME: $( date) "
0 commit comments