Skip to content

Commit dcad8b0

Browse files
lugimzzzDesmonDay
andauthored
[llm]update tipc setting (#6990)
* update setting * delete --------- Co-authored-by: DesmonDay <[email protected]>
1 parent fc0e7e1 commit dcad8b0

13 files changed

+45
-47
lines changed

tests/test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ function _set_params(){
2020
# 脚本所需参数
2121
model_name_or_path=${1:-"facebook/llama-7b"}
2222
dataset_name_or_path=${2:-"llm_benchmark_zh"}
23-
max_length=${3:-"1024"}
23+
base_batch_size=${3:-"1"}
2424
learning_rate=${4:-"3e-05"}
2525
recompute=${5:-"true"}
2626
tensor_parallel_degree=${6:-"1"}
@@ -29,7 +29,6 @@ function _set_params(){
2929

3030
# benchmark配置参数
3131
model_item=${9:-"facebook/llama-7b"} # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48
32-
base_batch_size=1 # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数
3332
fp_item="fp16" # (必选) fp32|fp16
3433
run_mode=${10:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1
3534
device_num=${11:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡)
@@ -91,7 +90,7 @@ function _train(){
9190
train_cmd=" --model_name_or_path ${model_name_or_path} \
9291
--dataset_name_or_path ${dataset_name_or_path} \
9392
--output_dir output \
94-
--per_device_train_batch_size 1 \
93+
--per_device_train_batch_size ${base_batch_size} \
9594
--gradient_accumulation_steps 1 \
9695
--num_train_epochs ${num_train_epochs} \
9796
--learning_rate ${learning_rate} \
@@ -100,7 +99,7 @@ function _train(){
10099
--save_strategy no \
101100
--logging_steps 1 \
102101
--src_length 1024 \
103-
--max_length ${max_length} \
102+
--max_length 1024 \
104103
--fp16 1 \
105104
--fp16_opt_level O2 \
106105
--do_train 1 \
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="bigscience/bloomz-7b1-mt"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=1
1818
learning_rate="3e-04"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="1"
2121
lora="1"
2222
prefix_tuning="0"
2323
model_item="bigscience-bloomz-7b1-mt_lora"
24-
run_mode="DP1-recompute"
24+
run_mode="DP1"
2525
device_num="N1C1"
2626
num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="bigscience/bloomz-7b1-mt"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=1
1818
learning_rate="3e-02"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="1"
2121
lora="0"
2222
prefix_tuning="1"
2323
model_item="bigscience-bloomz-7b1-mt_pt"
24-
run_mode="DP1-recompute"
24+
run_mode="DP1"
2525
device_num="N1C1"
2626
num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="bigscience/bloomz-7b1-mt"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=2
1818
learning_rate="3e-05"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="8"
2121
lora="0"
2222
prefix_tuning="0"
2323
model_item="bigscience-bloomz-7b1-mt_sft"
24-
run_mode="MP8-recompute"
24+
run_mode="MP8"
2525
device_num="N1C8"
2626
num_train_epochs=5
2727
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="THUDM/chatglm-6b"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=1
1818
learning_rate="3e-04"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="1"
2121
lora="1"
2222
prefix_tuning="0"
2323
model_item="THUDM-chatglm-6b_lora"
24-
run_mode="DP1-recompute"
24+
run_mode="DP1"
2525
device_num="N1C1"
2626
num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="THUDM/chatglm-6b"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=1
1818
learning_rate="3e-02"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="1"
2121
lora="0"
2222
prefix_tuning="1"
2323
model_item="THUDM-chatglm-6b_pt"
24-
run_mode="DP1-recompute"
24+
run_mode="DP1"
2525
device_num="N1C1"
2626
num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="THUDM/chatglm-6b"
1616
dataset_name_or_path="llm_benchmark_zh"
17-
max_length=3072
17+
base_batch_size=2
1818
learning_rate="3e-05"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="8"
2121
lora="0"
2222
prefix_tuning="0"
2323
model_item="THUDM-chatglm-6b_sft"
24-
run_mode="MP8-recompute"
24+
run_mode="MP8"
2525
device_num="N1C8"
2626
num_train_epochs=5
2727
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}

tests/test_tipc/dygraph/ft/llama/N1C1/facebook-llama-13b_lora_bs1_fp16_DP1-recompute.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
model_name_or_path="facebook/llama-13b"
1616
dataset_name_or_path="llm_benchmark_en"
17-
max_length=1024
17+
base_batch_size=1
1818
learning_rate="3e-04"
1919
recompute="1"
2020
tensor_parallel_degree="1"
@@ -27,4 +27,4 @@ num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}

tests/test_tipc/dygraph/ft/llama/N1C1/facebook-llama-13b_pt_bs1_fp16_DP1-recompute.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
model_name_or_path="facebook/llama-13b"
1616
dataset_name_or_path="llm_benchmark_en"
17-
max_length=1024
17+
base_batch_size=1
1818
learning_rate="3e-02"
1919
recompute="1"
2020
tensor_parallel_degree="1"
@@ -27,4 +27,4 @@ num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
model_name_or_path="facebook/llama-7b"
1616
dataset_name_or_path="llm_benchmark_en"
17-
max_length=3072
17+
base_batch_size=1
1818
learning_rate="3e-04"
19-
recompute="1"
19+
recompute="0"
2020
tensor_parallel_degree="1"
2121
lora="1"
2222
prefix_tuning="0"
2323
model_item="facebook-llama-7b_lora"
24-
run_mode="DP1-recompute"
24+
run_mode="DP1"
2525
device_num="N1C1"
2626
num_train_epochs=2
2727
export CUDA_VISIBLE_DEVICES=0
2828
cd ./tests
2929
bash ./test_tipc/dygraph/ft/benchmark_common/prepare.sh
30-
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${max_length} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}
30+
bash ./test_tipc/dygraph/ft/benchmark_common/run_benchmark.sh ${model_name_or_path} ${dataset_name_or_path} ${base_batch_size} ${learning_rate} ${recompute} ${tensor_parallel_degree} ${lora} ${prefix_tuning} ${model_item} ${run_mode} ${device_num} ${num_train_epochs}

0 commit comments

Comments
 (0)