Skip to content

Commit 2c0d853

Browse files
authored
add test for CustomAllreduce (#3313)
1 parent 8791ad4 commit 2c0d853

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed

test/distributed/custom_all_reduce.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
17+
import unittest
18+
19+
import numpy as np
20+
import paddle
21+
import paddle.distributed as dist
22+
from paddle.distributed import fleet
23+
24+
from fastdeploy.distributed.custom_all_reduce import CustomAllreduce
25+
26+
27+
class Test(unittest.TestCase):
28+
def setUp(self):
29+
"""
30+
Initialize the test environment,
31+
including setting random seeds.
32+
"""
33+
paddle.seed(2025)
34+
35+
strategy = fleet.DistributedStrategy()
36+
strategy.hybrid_configs = {
37+
"dp_degree": 1,
38+
"mp_degree": 2,
39+
"pp_degree": 1,
40+
"sharding_degree": 1,
41+
}
42+
43+
fleet.init(is_collective=True, strategy=strategy)
44+
45+
def test_case(self):
46+
"""
47+
Check if the CustomAllreduce function works properly.
48+
"""
49+
50+
mns = [[1, 2048], [2, 4096], [20, 4096], [128, 4096], [256, 4096], [256, 8192]]
51+
52+
hcg = fleet.get_hybrid_communicate_group()
53+
model_parallel_group = hcg.get_model_parallel_group()
54+
fa = CustomAllreduce(model_parallel_group)
55+
56+
for m, n in mns:
57+
data_cusom_ar = paddle.rand([m, n], dtype="bfloat16")
58+
data_paddle = data_cusom_ar.clone()
59+
if fa.should_custom_ar(data_cusom_ar):
60+
fa.custom_all_reduce(data_cusom_ar)
61+
dist.all_reduce(data_paddle)
62+
if dist.get_rank() == 0:
63+
np.testing.assert_allclose(
64+
data_cusom_ar.numpy(),
65+
data_paddle.numpy(),
66+
rtol=1e-04,
67+
atol=1e-04,
68+
)
69+
70+
71+
if __name__ == "__main__":
72+
unittest.main()
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import subprocess
17+
import sys
18+
19+
20+
def test_custom_all_reduce_launch():
21+
"""
22+
test_custom_all_reduce
23+
"""
24+
current_dir = os.path.dirname(os.path.abspath(__file__))
25+
custom_all_reduce_script = os.path.join(current_dir, "custom_all_reduce.py")
26+
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
27+
command = [
28+
sys.executable,
29+
"-m",
30+
"paddle.distributed.launch",
31+
"--gpus",
32+
"0,1",
33+
custom_all_reduce_script,
34+
]
35+
36+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
37+
38+
try:
39+
stdout, stderr = process.communicate(timeout=400)
40+
return_code = process.returncode
41+
except subprocess.TimeoutExpired:
42+
process.kill()
43+
stdout, stderr = process.communicate()
44+
return_code = -1
45+
assert return_code == 0, f"Process exited with code {return_code}"
46+
47+
48+
test_custom_all_reduce_launch()

0 commit comments

Comments
 (0)