Skip to content

Commit 84b03bc

Browse files
MMathisLabyeshaokaiHaozhe Qi
authored
Release-iccv (#13)
* cleanup * code runnable * removed our names * more clean up * updates * update to readme * Update README.md * updates * removed more identity related info --------- Co-authored-by: shaokaiye <[email protected]> Co-authored-by: Haozhe Qi <[email protected]>
1 parent 4133ca9 commit 84b03bc

File tree

158 files changed

+303
-24651
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+303
-24651
lines changed

.vscode/launch.json

Lines changed: 0 additions & 486 deletions
This file was deleted.

README.md

Lines changed: 6 additions & 235 deletions
Large diffs are not rendered by default.

add_dataset_name.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

cog.yaml

Lines changed: 0 additions & 37 deletions
This file was deleted.

run_todi2.sbatch renamed to dev_7b_16f_top20_full_includes_tim_no_avion_no_narration.sbatch

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
#!/bin/bash
22
#SBATCH --job-name multinode
3-
#SBATCH --account a03
4-
#SBATCH --reservation=sai-a03
3+
#SBATCH -A a-a03
54
#SBATCH --hint nomultithread
65
#SBATCH --cpus-per-task 288
7-
#SBATCH --mem=460000
86
#SBATCH --no-requeue
9-
#SBATCH --nodes 4 # number of Nodes
7+
#SBATCH --nodes 8 # number of Nodes
108
#SBATCH --ntasks-per-node 1 # number of MP tasks. IMPORTANT: torchrun represents just 1 Slurm task
119
#SBATCH --gres gpu:4 # Number of GPUs
12-
#SBATCH --time 05:00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
13-
#SBATCH --output logs/R-%x.%j_train_llavavideo_kitchen_7b_avion_mc_32f_top10_5epochs_2.out
14-
#SBATCH --error logs/R-%x.%j_train_llavavideo_kitchen_7b_avion_mc_32f_top10_5epochs_2.err
10+
#SBATCH --time 14:00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
11+
#SBATCH --output temp/dev_7b_16f_top20_full_includes_tim_no_avion_narration_only.out
12+
#SBATCH --error temp/dev_7b_16f_top20_full_includes_tim_no_avion_narration_only.err
1513

1614
mkdir -p logs
1715

@@ -57,10 +55,10 @@ LAUNCHER="torchrun \
5755
PYTHON_FILE=llava/train/train_mem.py
5856
PYTHON_ARGS=" \
5957
--deepspeed scripts/zero3.json \
60-
--model_name_or_path pretrained_models/LLaVA-Video-7B-Qwen2 \
58+
--model_name_or_path lmms-lab/LLaVA-Video-7B-Qwen2 \
6159
--version qwen_1_5 \
62-
--data_path scripts/train/EK100_avion_mc_top10.yaml \
63-
--video_folder /capstor/scratch/cscs/hqi/llava/onevision/llava_video \
60+
--data_path scripts/train/tim_top20_official_key_gpt4o_direct_detection.yaml \
61+
--video_folder /iopsstor/scratch/cscs/anonymous/VFM/onevision/EK100_512 \
6462
--mm_tunable_parts mm_vision_tower,mm_mlp_adapter,mm_language_model \
6563
--mm_vision_tower_lr 2e-6 \
6664
--vision_tower google/siglip-so400m-patch14-384 \
@@ -73,16 +71,16 @@ PYTHON_ARGS=" \
7371
--image_grid_pinpoints \"(1x1),...,(6x6)\" \
7472
--mm_patch_merge_type spatial_unpad \
7573
--bf16 True \
76-
--run_name todi_llava_video_7b_avion_mc_top10_5epochs_2 \
77-
--output_dir experiments/todi_llava_video_7b_avion_mc_top10_5epochs_2 \
78-
--num_train_epochs 5 \
79-
--per_device_train_batch_size 2 \
74+
--run_name dev_7b_16f_top20_full_includes_tim_no_avion_narration_only \
75+
--output_dir temp/dev_7b_16f_top20_full_includes_tim_no_avion_narration_only \
76+
--num_train_epochs 2 \
77+
--per_device_train_batch_size 1 \
8078
--per_device_eval_batch_size 4 \
8179
--gradient_accumulation_steps 2 \
82-
--evaluation_strategy steps \
83-
--eval_steps 2000\
80+
--evaluation_strategy epoch \
81+
--eval_steps 1\
8482
--save_strategy steps \
85-
--save_steps 1000 \
83+
--save_steps 5000 \
8684
--learning_rate 1e-5 \
8785
--weight_decay 0. \
8886
--warmup_ratio 0.03 \
@@ -97,20 +95,26 @@ PYTHON_ARGS=" \
9795
--torch_compile True \
9896
--torch_compile_backend inductor \
9997
--dataloader_drop_last True \
100-
--frames_upbound 32 \
101-
--root /capstor/scratch/cscs/hqi/llava/onevision/llava_video/EK100 \
102-
--action_predictions /capstor/scratch/cscs/hqi/llava/EK100/avion_predictions_test.json \
103-
--val_metadata /capstor/scratch/cscs/hqi/llava/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
104-
--llava_num_frames 32 \
105-
--clip_length 32 \
106-
--topk_predictions 10 \
98+
--frames_upbound 16 \
99+
--root /iopsstor/scratch/cscs/anonymous/VFM/onevision/EK100_512/EK100 \
100+
--action_predictions /iopsstor/scratch/cscs/anonymous/VFM/llava_data/TIM_PREDS/tim_pred_ids_val.json \
101+
--val_metadata /iopsstor/scratch/cscs/anonymous/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
102+
--llava_num_frames 16 \
103+
--add_time_instruction True \
104+
--clip_length 16 \
105+
--topk_predictions 20 \
106+
--action_representation GT_random_narration \
107+
--vision_supervision one_token \
108+
--vision_token_training last_layer \
109+
--action_types 97,300,3806 \
110+
--learn_neighbor_actions prior \
107111
"
108112

109113
export CMD="$LAUNCHER $PYTHON_FILE $PYTHON_ARGS"
110114
export HF_HOME=$SCRATCH/huggingface
111115
export OMP_NUM_THREADS="8"
112116
export ACCELERATE_CPU_AFFINITY="1"
113-
export WANDB_API_KEY="65aeda82a75f1eed29c8e9250b175fcc73dca0d7"
117+
export WANDB_API_KEY=""
114118

115119
echo $CMD
116120

@@ -132,7 +136,7 @@ SRUN_ARGS=" \
132136

133137
# bash -c is needed for the delayed interpolation of env vars to work
134138
srun $SRUN_ARGS numactl --membind=0-3 bash -c "
135-
source /capstor/scratch/cscs/hqi/llava/llava_dependency/llava-venv/bin/activate
139+
source /iopsstor/scratch/cscs/anonymous/VFM/llava_dependency/llava-venv/bin/activate
136140
$CMD"
137141

138-
echo "END TIME: $(date)"
142+
echo "END TIME: $(date)"

run_clariden.sbatch renamed to dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_512_detection_direct_neighbor_178K_100percent_time.sbatch

Lines changed: 22 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
#SBATCH -A a-a03
44
#SBATCH --hint nomultithread
55
#SBATCH --cpus-per-task 288
6-
#SBATCH --mem=460000
76
#SBATCH --no-requeue
8-
#SBATCH --nodes 16 # number of Nodes
7+
#SBATCH --nodes 32 # number of Nodes
98
#SBATCH --ntasks-per-node 1 # number of MP tasks. IMPORTANT: torchrun represents just 1 Slurm task
109
#SBATCH --gres gpu:4 # Number of GPUs
11-
#SBATCH --time 23:00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
12-
#SBATCH --output logs/R-%x.%j-dev_7b_64f_EK100_haozhe.out
13-
#SBATCH --error logs/R-%x.%j-dev_7b_64f_EK100_haozhe.err
10+
#SBATCH --time 20:00:00 # maximum execution time (DD-HH:MM:SS). Mandatory field in MN5
11+
#SBATCH --output temp/R-%x.%j_dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time.out
12+
#SBATCH --error temp/R-%x.%j_dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time.err
1413

1514
mkdir -p logs
1615

@@ -58,8 +57,8 @@ PYTHON_ARGS=" \
5857
--deepspeed scripts/zero3.json \
5958
--model_name_or_path lmms-lab/LLaVA-Video-7B-Qwen2 \
6059
--version qwen_1_5 \
61-
--data_path scripts/train/llava_video.yaml \
62-
--video_folder /iopsstor/scratch/cscs/hqi/VFM/onevision/llava_video \
60+
--data_path scripts/train/avion_tim_top5_gpt4o_detection_direct_178K_100percent.yaml \
61+
--video_folder /iopsstor/scratch/anonymous_server/anonymous/VFM/onevision/llava_video/ \
6362
--mm_tunable_parts mm_vision_tower,mm_mlp_adapter,mm_language_model \
6463
--mm_vision_tower_lr 2e-6 \
6564
--vision_tower google/siglip-so400m-patch14-384 \
@@ -72,21 +71,16 @@ PYTHON_ARGS=" \
7271
--image_grid_pinpoints \"(1x1),...,(6x6)\" \
7372
--mm_patch_merge_type spatial_unpad \
7473
--bf16 True \
75-
--run_name dev_7b_64f_EK100_haozhe \
76-
--output_dir experiments/dev_7b_64f_EK100_haozhe \
74+
--run_name dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time \
75+
--output_dir temp/dev_7b_64f_top5_gpt4o_avion_tim_last_layer_one_token_detection_direct_neighbor_178K_100percent_time \
7776
--num_train_epochs 1 \
7877
--per_device_train_batch_size 1 \
7978
--per_device_eval_batch_size 4 \
8079
--gradient_accumulation_steps 2 \
81-
<<<<<<< HEAD:run_todi.sbatch
82-
--evaluation_strategy steps \
83-
--eval_steps 200000\
84-
=======
8580
--evaluation_strategy epoch \
86-
--eval_steps 1 \
87-
>>>>>>> origin/haozhedev:run_clariden.sbatch
81+
--eval_steps 1\
8882
--save_strategy steps \
89-
--save_steps 2000 \
83+
--save_steps 1000 \
9084
--learning_rate 1e-5 \
9185
--weight_decay 0. \
9286
--warmup_ratio 0.03 \
@@ -102,24 +96,25 @@ PYTHON_ARGS=" \
10296
--torch_compile_backend inductor \
10397
--dataloader_drop_last True \
10498
--frames_upbound 64 \
105-
--mm_newline_position grid \
106-
--add_time_instruction True \
107-
--force_sample True \
108-
--mm_spatial_pool_stride 2 \
109-
--root /iopsstor/scratch/cscs/hqi/VFM/onevision/llava_video/EK100 \
110-
--action_predictions /iopsstor/scratch/cscs/hqi/VFM/llava_data/TIM_PREDS/tim_pred_ids_val.json \
111-
--val_metadata /iopsstor/scratch/cscs/hqi/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
99+
--root /iopsstor/scratch/anonymous_server/anonymous/VFM/onevision/llava_video/EK100/ \
100+
--action_predictions /iopsstor/scratch/anonymous_server/anonymous/VFM/llava_data/TIM_PREDS/tim_pred_ids_val.json \
101+
--val_metadata /iopsstor/scratch/anonymous_server/anonymous/VFM/EK100/epic-kitchens-100-annotations/EPIC_100_validation.csv \
112102
--llava_num_frames 64 \
103+
--add_time_instruction True \
113104
--clip_length 64 \
114-
--action_representation official_key \
115105
--topk_predictions 5 \
106+
--action_representation GT_random_narration \
107+
--vision_supervision one_token \
108+
--vision_token_training last_layer \
109+
--action_types 97,300,3806 \
110+
--learn_neighbor_actions prior \
116111
"
117112

118113
export CMD="$LAUNCHER $PYTHON_FILE $PYTHON_ARGS"
119114
export HF_HOME=$SCRATCH/huggingface
120115
export OMP_NUM_THREADS="8"
121116
export ACCELERATE_CPU_AFFINITY="1"
122-
export WANDB_API_KEY="65aeda82a75f1eed29c8e9250b175fcc73dca0d7"
117+
export WANDB_API_KEY=""
123118

124119
echo $CMD
125120

@@ -141,11 +136,7 @@ SRUN_ARGS=" \
141136

142137
# bash -c is needed for the delayed interpolation of env vars to work
143138
srun $SRUN_ARGS numactl --membind=0-3 bash -c "
144-
<<<<<<< HEAD:run_todi.sbatch
145-
source /capstor/scratch/cscs/hqi/llava/llava_dependency/llava-venv/bin/activate
146-
=======
147-
source /iopsstor/scratch/cscs/hqi/VFM/llava_dependency/llava-venv/bin/activate
148-
>>>>>>> origin/haozhedev:run_clariden.sbatch
139+
source /iopsstor/scratch/anonymous_server/anonymous/VFM/llava_dependency/llava-venv/bin/activate
149140
$CMD"
150141

151-
echo "END TIME: $(date)"
142+
echo "END TIME: $(date)"

docs/LLaVA-NeXT-Interleave.md

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)