Skip to content

Commit f0cef5f

Browse files
authored
Update finetune_onevision.sh
1 parent 7125e36 commit f0cef5f

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

scripts/train/finetune_onevision.sh

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,26 @@ VISION_MODEL_VERSION_CLEAN="${VISION_MODEL_VERSION//\//_}"
1313

1414
############### Pretrain ################
1515

16-
PROMPT_VERSION="qwen_1_5"
17-
18-
BASE_RUN_NAME="llavanext-${VISION_MODEL_VERSION_CLEAN}-${LLM_VERSION_CLEAN}-mlp2x_gelu-pretrain_blip558k_plain"
16+
BASE_RUN_NAME="llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-mlp2x_gelu-pretrain_blip558k_plain"
1917
echo "BASE_RUN_NAME: ${BASE_RUN_NAME}"
2018

21-
CKPT_PATH=$LLM_VERSION # this could also be the previous stage checkpoint
19+
############### Finetune ################
20+
21+
# Stage 2
22+
PROMPT_VERSION="qwen_1_5"
23+
MID_RUN_NAME="llava-onevision-${VISION_MODEL_VERSION_CLEAN}-${LLM_VERSION_CLEAN}-ov_stage_am9"
24+
PREV_STAGE_CHECKPOINT="/mnt/bn/vl-research/checkpoints/onevision/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-mid_to_final_next_3m_am9_july14" # replace it with your last checkpoint training from single image collection
25+
echo "PREV_STAGE_CHECKPOINT: ${PREV_STAGE_CHECKPOINT}"
26+
echo "MID_RUN_NAME: ${MID_RUN_NAME}"
2227

2328
ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NNODES}" --node_rank="${RANK}" --master_addr="${ADDR}" --master_port="${PORT}" \
2429
llava/train/train_mem.py \
2530
--deepspeed scripts/zero3.json \
26-
--model_name_or_path ${CKPT_PATH} \
27-
--version ${PROMPT_VERSION} \
28-
--data_path ./onevision_data.yaml \
29-
--image_folder ./onevision_data/images \
30-
--video_folder ./onevision_data/videos \
31-
--pretrain_mm_mlp_adapter="/checkpoints/projectors/${BASE_RUN_NAME}/mm_projector.bin" \
31+
--model_name_or_path $PREV_STAGE_CHECKPOINT \
32+
--version $PROMPT_VERSION \
33+
--data_path /mnt/bn/vl-research/workspace/boli01/projects/LLaVA_Next/scripts/i18n/scale_llms/next_ov_stage_july21.yaml \
34+
--image_folder /mnt/bn/vl-research/data/llava_data \
35+
--video_folder /mnt/bn/vl-research/data/llava_video \
3236
--mm_tunable_parts="mm_vision_tower,mm_mlp_adapter,mm_language_model" \
3337
--mm_vision_tower_lr=2e-6 \
3438
--vision_tower ${VISION_MODEL_VERSION} \
@@ -42,7 +46,7 @@ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NN
4246
--mm_patch_merge_type spatial_unpad \
4347
--bf16 True \
4448
--run_name $MID_RUN_NAME \
45-
--output_dir "/checkpoints/${MID_RUN_NAME}" \
49+
--output_dir /mnt/bn/vl-research/checkpoints/onevision/$MID_RUN_NAME \
4650
--num_train_epochs 1 \
4751
--per_device_train_batch_size 1 \
4852
--per_device_eval_batch_size 4 \
@@ -66,5 +70,6 @@ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NN
6670
--torch_compile_backend "inductor" \
6771
--dataloader_drop_last True \
6872
--frames_upbound 32
73+
exit 0;
6974

70-
# You can delete the sdpa attn_implementation if you want to use flash attn
75+
# You can delete the sdpa attn_implementation if you want to use flash attn

0 commit comments

Comments
 (0)