Skip to content

Commit c1d6c3f

Browse files
committed
Add fine-tuning script for InternVL2-76B (#440)
1 parent 20fafb6 commit c1d6c3f

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
set -x
2+
3+
PARTITION=${PARTITION:-"INTERN2"}
4+
GPUS=${GPUS:-32}
5+
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
6+
QUOTA_TYPE=${QUOTA_TYPE:-"reserved"}
7+
NODES=$((GPUS / GPUS_PER_NODE))
8+
CPUS_PER_TASK=${CPUS_PER_TASK:-10}
9+
SRUN_ARGS=${SRUN_ARGS:-""}
10+
BATCH_SIZE=${BATCH_SIZE:-128}
11+
PER_DEVICE_BATCH_SIZE=${PER_DEVICE_BATCH_SIZE:-1}
12+
GRADIENT_ACC=$((BATCH_SIZE / PER_DEVICE_BATCH_SIZE / GPUS))
13+
14+
15+
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
16+
export MASTER_PORT=34229
17+
export TF_CPP_MIN_LOG_LEVEL=3
18+
19+
OUTPUT_DIR='work_dirs/internvl_chat_v2_0/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full'
20+
21+
if [ ! -d "$OUTPUT_DIR" ]; then
22+
mkdir -p "$OUTPUT_DIR"
23+
fi
24+
25+
# number of gpus: 32
26+
# batch size per gpu: 1
27+
# gradient accumulation steps: 4
28+
# total batch size: 128
29+
# epoch: 1
30+
srun -p ${PARTITION} \
31+
--gres=gpu:${GPUS_PER_NODE} \
32+
--nodes=${NODES} \
33+
--ntasks=${GPUS} \
34+
--ntasks-per-node=${GPUS_PER_NODE} \
35+
--cpus-per-task=${CPUS_PER_TASK} \
36+
--kill-on-bad-exit=1 \
37+
--quotatype=${QUOTA_TYPE} \
38+
${SRUN_ARGS} \
39+
python -u internvl/train/internvl_chat_finetune.py \
40+
--model_name_or_path "./pretrained/InternVL2-Llama3-76B" \
41+
--conv_style "internlm2-chat" \
42+
--output_dir ${OUTPUT_DIR} \
43+
--meta_path "./shell/data/internvl_1_2_finetune_custom.json" \
44+
--overwrite_output_dir True \
45+
--force_image_size 448 \
46+
--max_dynamic_patch 6 \
47+
--down_sample_ratio 0.5 \
48+
--drop_path_rate 0.4 \
49+
--freeze_llm False \
50+
--freeze_mlp False \
51+
--freeze_backbone True \
52+
--vision_select_layer -1 \
53+
--dataloader_num_workers 4 \
54+
--bf16 True \
55+
--num_train_epochs 1 \
56+
--per_device_train_batch_size ${PER_DEVICE_BATCH_SIZE} \
57+
--gradient_accumulation_steps ${GRADIENT_ACC} \
58+
--evaluation_strategy "no" \
59+
--save_strategy "steps" \
60+
--save_steps 200 \
61+
--save_total_limit 1 \
62+
--learning_rate 2e-5 \
63+
--weight_decay 0.05 \
64+
--warmup_ratio 0.03 \
65+
--lr_scheduler_type "cosine" \
66+
--logging_steps 1 \
67+
--max_seq_length 4096 \
68+
--do_train True \
69+
--grad_checkpoint True \
70+
--group_by_length True \
71+
--dynamic_image_size True \
72+
--use_thumbnail True \
73+
--ps_version 'v2' \
74+
--deepspeed "zero_stage3_config_100b.json" \
75+
--report_to "tensorboard" \
76+
2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
set -x
2+
3+
GPUS=${GPUS:-8}
4+
BATCH_SIZE=${BATCH_SIZE:-16}
5+
PER_DEVICE_BATCH_SIZE=${PER_DEVICE_BATCH_SIZE:-1}
6+
GRADIENT_ACC=$((BATCH_SIZE / PER_DEVICE_BATCH_SIZE / GPUS))
7+
8+
9+
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
10+
export MASTER_PORT=34229
11+
export TF_CPP_MIN_LOG_LEVEL=3
12+
export LAUNCHER=pytorch
13+
14+
OUTPUT_DIR='work_dirs/internvl_chat_v2_0/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora'
15+
16+
if [ ! -d "$OUTPUT_DIR" ]; then
17+
mkdir -p "$OUTPUT_DIR"
18+
fi
19+
20+
# number of gpus: 8
21+
# batch size per gpu: 1
22+
# gradient accumulation steps: 2
23+
# total batch size: 16
24+
# epoch: 1
25+
torchrun \
26+
--nnodes=1 \
27+
--node_rank=0 \
28+
--master_addr=127.0.0.1 \
29+
--nproc_per_node=${GPUS} \
30+
--master_port=${MASTER_PORT} \
31+
internvl/train/internvl_chat_finetune.py \
32+
--model_name_or_path "./pretrained/InternVL2-Llama3-76B" \
33+
--conv_style "internlm2-chat" \
34+
--output_dir ${OUTPUT_DIR} \
35+
--meta_path "./shell/data/internvl_1_2_finetune_custom.json" \
36+
--overwrite_output_dir True \
37+
--force_image_size 448 \
38+
--max_dynamic_patch 6 \
39+
--down_sample_ratio 0.5 \
40+
--drop_path_rate 0.0 \
41+
--freeze_llm True \
42+
--freeze_mlp True \
43+
--freeze_backbone True \
44+
--use_llm_lora 16 \
45+
--vision_select_layer -1 \
46+
--dataloader_num_workers 4 \
47+
--bf16 True \
48+
--num_train_epochs 1 \
49+
--per_device_train_batch_size ${PER_DEVICE_BATCH_SIZE} \
50+
--gradient_accumulation_steps ${GRADIENT_ACC} \
51+
--evaluation_strategy "no" \
52+
--save_strategy "steps" \
53+
--save_steps 200 \
54+
--save_total_limit 1 \
55+
--learning_rate 2e-5 \
56+
--weight_decay 0.05 \
57+
--warmup_ratio 0.03 \
58+
--lr_scheduler_type "cosine" \
59+
--logging_steps 1 \
60+
--max_seq_length 4096 \
61+
--do_train True \
62+
--grad_checkpoint True \
63+
--group_by_length True \
64+
--dynamic_image_size True \
65+
--use_thumbnail True \
66+
--ps_version 'v2' \
67+
--deepspeed "zero_stage3_config_100b.json" \
68+
--report_to "tensorboard" \
69+
2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt"

0 commit comments

Comments
 (0)