Skip to content

Commit 7e09afc

Browse files
authored
[Benchmark]change N2C16 -> gbs=64 (#6902)
* change N2C16 -> gbs=64 * update gbs=64 configuration * update gbs field * update sharding_degree -> ssharding_parallel_degree * update llama scripts * update llama benchmark scripts * update llama N2C16 scripts * change N2C16 script name * update N1C8 * update N1C8 script * update llama benchmark training strategy * update N2C16 scripts * update llama new benchmark training strategy * update run_benchmark * update max_step -> max_steps * update tensor_parallel_config * update benchmark script * update base_batch_size * update training_args * change order or split * update default tensor_parllel_config value * change fp16 -> bf16 * revert requ-dev.txt * update requirements-dev.txt
1 parent 1ef5b94 commit 7e09afc

8 files changed

+228
-143
lines changed

paddlenlp/trainer/training_args.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,12 @@ def __post_init__(self):
906906

907907
if tensor_parallel_degree > 1:
908908
strategy.tensor_parallel_configs = {"tensor_init_seed": self.seed}
909-
mp_config = set(self.tensor_parallel_config.split(" "))
909+
910+
if " " in self.tensor_parallel_config:
911+
mp_config = set(self.tensor_parallel_config.split(" "))
912+
else:
913+
mp_config = set(self.tensor_parallel_config.split(","))
914+
910915
for x in mp_config:
911916
if len(x) > 0:
912917
if x not in [

tests/test_tipc/dygraph/hybrid_parallelism/llama/N1C8/facebook-llama-13b_pretrain_bs32_fp16_DP1-MP2-PP2-SD2.sh

Lines changed: 0 additions & 38 deletions
This file was deleted.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
param="model_name_or_path=facebook/llama-13b "
17+
param+="per_device_train_batch_size=1 "
18+
param+="tensor_parallel_degree=1 "
19+
param+="data_parallel_degree=1 "
20+
param+="pipeline_parallel_degree=8 "
21+
param+="virtual_pp_degree=5 "
22+
param+="sequence_parallel=0 "
23+
param+="sharding_parallel_degree=1 "
24+
param+="save_steps=200 "
25+
param+="sharding=stage1 "
26+
param+="recompute=1 "
27+
param+="run_mode=DP1-MP1-PP8-mbs1-acc32-recompute "
28+
param+="device_num=N1C8 "
29+
param+="global_batch_size=32 "
30+
param+="model_item=facebook-llama-13b_seqlen2048_pretrain "
31+
param+="max_steps=150 "
32+
param+="gradient_accumulation_steps=32 "
33+
param+="pp_recompute_interval=1 "
34+
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add "
35+
param+="recompute_use_reentrant=true "
36+
37+
cd ./tests
38+
bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/prepare.sh
39+
40+
bash -c "${param} bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/run_benchmark.sh"
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
param="model_name_or_path=facebook/llama-13b "
17+
param+="per_device_train_batch_size=2 "
18+
param+="tensor_parallel_degree=2 "
19+
param+="data_parallel_degree=1 "
20+
param+="pipeline_parallel_degree=4 "
21+
param+="virtual_pp_degree=5 "
22+
param+="sequence_parallel=0 "
23+
param+="sharding_parallel_degree=1 "
24+
param+="save_steps=200 "
25+
param+="sharding=stage1 "
26+
param+="recompute=1 "
27+
param+="run_mode=DP1-MP2-PP4-mbs2-acc16-recompute "
28+
param+="device_num=N1C8 "
29+
param+="global_batch_size=32 "
30+
param+="model_item=facebook-llama-13b_seqlen2048_pretrain "
31+
param+="max_steps=150 "
32+
param+="gradient_accumulation_steps=16 "
33+
param+="pp_recompute_interval=1 "
34+
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add "
35+
param+="recompute_use_reentrant=true "
36+
37+
cd ./tests
38+
bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/prepare.sh
39+
40+
bash -c "${param} bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/run_benchmark.sh"

tests/test_tipc/dygraph/hybrid_parallelism/llama/N2C16/facebook-llama-13b_pretrain_bs32_fp16_DP1-MP2-PP2-SD4.sh

Lines changed: 0 additions & 38 deletions
This file was deleted.
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
param="model_name_or_path=facebook/llama-13b "
16+
param+="per_device_train_batch_size=1 "
17+
param+="tensor_parallel_degree=2 "
18+
param+="data_parallel_degree=1 "
19+
param+="pipeline_parallel_degree=4 "
20+
param+="virtual_pp_degree=1 "
21+
param+="sequence_parallel=0 "
22+
param+="sharding_parallel_degree=2 "
23+
param+="save_steps=200 "
24+
param+="sharding=stage1 "
25+
param+="recompute=0 "
26+
param+="run_mode=DP1-MP2-PP4-VPP1-mbs1-acc32-recompute "
27+
param+="device_num=N2C16 "
28+
param+="global_batch_size=64 "
29+
param+="model_item=facebook-llama-13b_seqlen2048_pretrain "
30+
param+="max_steps=150 "
31+
param+="gradient_accumulation_steps=32 "
32+
param+="pp_recompute_interval=1 "
33+
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add "
34+
param+="recompute_use_reentrant=true "
35+
36+
cd ./tests
37+
bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/prepare.sh
38+
39+
bash -c "${param} bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/run_benchmark.sh"
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
param="model_name_or_path=facebook/llama-13b "
16+
param+="per_device_train_batch_size=1 "
17+
param+="tensor_parallel_degree=2 "
18+
param+="data_parallel_degree=1 "
19+
param+="pipeline_parallel_degree=4 "
20+
param+="virtual_pp_degree=2 "
21+
param+="sequence_parallel=0 "
22+
param+="sharding_parallel_degree=2 "
23+
param+="save_steps=200 "
24+
param+="sharding=stage1 "
25+
param+="recompute=0 "
26+
param+="run_mode=DP1-MP2-PP4-VPP2-mbs1-acc32-recompute "
27+
param+="device_num=N2C16 "
28+
param+="global_batch_size=64 "
29+
param+="model_item=facebook-llama-13b_seqlen2048_pretrain "
30+
param+="max_steps=150 "
31+
param+="gradient_accumulation_steps=32 "
32+
param+="pp_recompute_interval=1 "
33+
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add "
34+
param+="recompute_use_reentrant=true "
35+
36+
cd ./tests
37+
bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/prepare.sh
38+
39+
bash -c "${param} bash ./test_tipc/dygraph/hybrid_parallelism/llama/benchmark_common/run_benchmark.sh"

0 commit comments

Comments
 (0)