Skip to content

Commit e672bb4

Browse files
author
lilong12
authored
Add benchmark for hybrid parallelism gpt-3 in static mode (#2222)
* add hybrid benchmark for static gpt-3 * add hybrid benchmark for static gpt-3 * delete unused file * update README * update * add run script * update README * update * update * update * update * update * update
1 parent cd43cc6 commit e672bb4

12 files changed

+345
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# PaddleNLP 下静态图混合并行 benchmark 模型执行说明
2+
静态图混合并行 benchmark 测试脚本说明
3+
4+
# 目录说明
5+
# Docker 运行环境
6+
docker image: registry.baidu.com/paddlecloud/base-images:paddlecloud-ubuntu18.04-gcc8.2-cuda11.0-cudnn8
7+
8+
# 运行 benchmark 测试步骤
9+
```shell
10+
git clone https://github.com/PaddlePaddle/PaddleNLP.git
11+
cd PaddleNLP/tests/
12+
```
13+
14+
# 准备数据
15+
16+
```shell
17+
bash test_tipc/static/hybrid_parallelism/gpt/benchmark_common/prepare.sh
18+
```
19+
20+
# 运行模型
21+
22+
## 单卡
23+
24+
```shell
25+
bash test_tipc/static/hybrid_parallelism/gpt/N1C1/${shell_name}.sh
26+
```
27+
28+
## 多卡
29+
30+
```shell
31+
bash test_tipc/static/hybrid_parallelism/gpt/N${node_num}C${gpu_num}/${shell_name}.sh
32+
```
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=1
5+
pp_degree=1
6+
dp_degree=1
7+
micro_batch_size=16
8+
global_batch_size=16
9+
run_mode=DP1-MP1-PP1
10+
device_num=N1C1
11+
max_iter=2000
12+
use_sharding=false
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=1
5+
pp_degree=1
6+
dp_degree=2
7+
micro_batch_size=8
8+
global_batch_size=16
9+
run_mode=DP2-MP1-PP1
10+
device_num=N1C2
11+
max_iter=1500
12+
use_sharding=false
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=4
5+
pp_degree=1
6+
dp_degree=1
7+
micro_batch_size=16
8+
global_batch_size=16
9+
run_mode=DP1-MP4-PP1
10+
device_num=N1C4
11+
max_iter=3
12+
use_sharding=false
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=1
5+
pp_degree=4
6+
dp_degree=1
7+
micro_batch_size=8
8+
global_batch_size=32
9+
run_mode=DP1-MP1-PP4
10+
device_num=N1C4
11+
max_iter=3
12+
use_sharding=true
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=4
5+
pp_degree=1
6+
dp_degree=2
7+
micro_batch_size=8
8+
global_batch_size=16
9+
run_mode=DP2-MP4-PP1
10+
device_num=N1C8
11+
max_iter=3
12+
use_sharding=false
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=2
5+
pp_degree=2
6+
dp_degree=2
7+
micro_batch_size=4
8+
global_batch_size=16
9+
run_mode=DP2-MP2-PP2
10+
device_num=N1C8
11+
max_iter=3
12+
use_sharding=true
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=8
5+
pp_degree=4
6+
dp_degree=1
7+
micro_batch_size=4
8+
global_batch_size=16
9+
run_mode=DP1-MP8-PP4
10+
device_num=N4C32
11+
max_iter=10
12+
use_sharding=true
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=8
5+
pp_degree=2
6+
dp_degree=2
7+
micro_batch_size=4
8+
global_batch_size=16
9+
run_mode=DP2-MP8-PP2
10+
device_num=N4C32
11+
max_iter=30
12+
use_sharding=true
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
model_item=gpt3
2+
model=gpt
3+
fp_item=fp16
4+
mp_degree=8
5+
pp_degree=1
6+
dp_degree=4
7+
micro_batch_size=4
8+
global_batch_size=16
9+
run_mode=DP4-MP8-PP1
10+
device_num=N4C32
11+
max_iter=30
12+
use_sharding=false
13+
14+
# run
15+
bash ./test_tipc/static/hybrid_parallelism/${model}/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${mp_degree} ${pp_degree} ${dp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${max_iter} ${use_sharding} 2>&1;

0 commit comments

Comments
 (0)