Skip to content

Commit a3eac61

Browse files
authored
[XPU] [DEEP_EP 1/4] Add XPU support for DeepEP.The current commit only ensures successful compilation; functional implementation will follow in subsequent commits. (PaddlePaddle#76284)
1 parent 9607b91 commit a3eac61

File tree

23 files changed

+5492
-0
lines changed

23 files changed

+5492
-0
lines changed

paddle/fluid/distributed/collective/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ if(WITH_DISTRIBUTE
88
set(DEEPEP_LIB deep_ep)
99
endif()
1010

11+
if(WITH_DISTRIBUTE
12+
AND WITH_XPU
13+
AND WITH_XPU_XRE5)
14+
message(STATUS "Enable DeepEP alltoall communication.")
15+
add_subdirectory(deep_ep_xpu)
16+
set(DEEPEP_LIB deep_ep_xpu)
17+
endif()
18+
1119
cc_library(
1220
process_group
1321
SRCS process_group.cc
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# DeepEP of 2025.05.19, commit: d5ca4495c0c068bc617102841b9322d378fac8ea
2+
# if(WITH_NVSHMEM)
3+
# set(CMAKE_CUDA_FLAGS
4+
# "${CMAKE_CUDA_FLAGS} -rdc=true --ptxas-options=--register-usage-level=10,--warn-on-local-memory-usage"
5+
# )
6+
# string(REGEX REPLACE "-gencode arch=compute_[0-8][0-9],code=sm_[0-8][0-9]" ""
7+
# CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
8+
9+
# set(DEEPEP_KERNEL_SRCS
10+
# kernels/intranode.cu
11+
# kernels/runtime.cu
12+
# kernels/internode.cu
13+
# kernels/internode_ll.cu
14+
# kernels/internode_ll_two_stage.cu
15+
# kernels/internode_ll.cu
16+
# kernels/m2n_ll_two_stage.cu)
17+
# cc_library(
18+
# deepep_kernels
19+
# SRCS ${DEEPEP_KERNEL_SRCS}
20+
# DEPS nvshmem cudadevrt)
21+
22+
# set_target_properties(deepep_kernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
23+
# set_target_properties(deepep_kernels PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS
24+
# ON)
25+
# else()
26+
# set(DEEPEP_KERNEL_SRCS kernels/intranode.cu kernels/runtime.cu)
27+
# cc_library(deepep_kernels SRCS ${DEEPEP_KERNEL_SRCS})
28+
# endif()
29+
30+
# cc_library(
31+
# deep_ep_xpu
32+
# SRCS deep_ep.cpp src/event_pool.cc src/event.cc src/CUDAStream.cc
33+
# DEPS phi common deepep_kernels)
34+
35+
# set_target_properties(deep_ep PROPERTIES CUDA_SEPARABLE_COMPILATION OFF)
36+
# target_compile_options(deep_ep PRIVATE -Wno-reorder -Wno-unused-variable)
37+
38+
# 永远不编译 .cu
39+
set(DEEPEP_KERNEL_SRCS) # 置空
40+
41+
# 用一个空的 interface 目标占位,满足 DEPS 依赖
42+
add_library(deepep_kernels INTERFACE)
43+
44+
# 主库不变
45+
cc_library(
46+
deep_ep_xpu
47+
SRCS deep_ep.cpp src/event_pool.cc src/event.cc src/CUDAStream.cc
48+
DEPS phi common deepep_kernels)
49+
50+
# 不设置任何 CUDA 属性(删除/注释下面两行)
51+
# set_target_properties(deepep_kernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
52+
# set_target_properties(deepep_kernels PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
53+
54+
# 修正目标名
55+
# set_target_properties(deep_ep PROPERTIES CUDA_SEPARABLE_COMPILATION OFF) # ← 删除
56+
# target_compile_options(deep_ep PRIVATE -Wno-reorder -Wno-unused-variable) # ← 删除
57+
target_compile_options(deep_ep_xpu PRIVATE -Wno-reorder -Wno-unused-variable)

0 commit comments

Comments
 (0)