Skip to content

Commit fac2f64

Browse files
authored
delete parallel_state.py (#3250)
1 parent fbdd6b0 commit fac2f64

File tree

3 files changed

+7
-37
lines changed

3 files changed

+7
-37
lines changed

fastdeploy/distributed/communication.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
import paddle.distributed as dist
2121
from paddle.distributed import fleet
2222

23-
from fastdeploy.distributed.parallel_state import get_tensor_model_parallel_world_size
24-
2523
_TP_AR = None
2624

2725

@@ -39,10 +37,9 @@ def use_custom_allreduce(custom_all_reduce_max_bytes: int = 8192 * 1024):
3937
hcg = fleet.get_hybrid_communicate_group()
4038
model_parallel_group = hcg.get_model_parallel_group()
4139
global _TP_AR
42-
if get_tensor_model_parallel_world_size() > 1 and paddle.is_compiled_with_cuda():
43-
from fastdeploy.distributed.custom_all_reduce import CustomAllreduce
40+
from fastdeploy.distributed.custom_all_reduce import CustomAllreduce
4441

45-
_TP_AR = CustomAllreduce(model_parallel_group, custom_all_reduce_max_bytes)
42+
_TP_AR = CustomAllreduce(model_parallel_group, custom_all_reduce_max_bytes)
4643

4744

4845
try:

fastdeploy/distributed/parallel_state.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

fastdeploy/worker/gpu_worker.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,11 @@ def init_device(self):
6868

6969
gc.collect()
7070
paddle.device.cuda.empty_cache()
71-
if self.parallel_config.enable_custom_all_reduce:
71+
if (
72+
self.parallel_config.enable_custom_all_reduce
73+
and self.parallel_config.tensor_parallel_size > 1
74+
and paddle.is_compiled_with_cuda()
75+
):
7276
from fastdeploy.distributed.communication import use_custom_allreduce
7377

7478
use_custom_allreduce()

0 commit comments

Comments
 (0)