Skip to content

Commit 3e365f1

Browse files
committed
add FlagCX as optional communication library when using iluvatar gpus
1 parent 56b15ef commit 3e365f1

File tree

4 files changed

+300
-8
lines changed

4 files changed

+300
-8
lines changed

backends/iluvatar_gpu/CMakeLists.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ include(external/eigen)
3333
include(external/xxhash)
3434
include(external/zlib)
3535
include(external/protobuf)
36+
if(WITH_FLAGCX)
37+
add_definitions("-DPADDLE_WITH_FLAGCX")
38+
include(external/flagcx)
39+
endif()
3640

3741
set(PLUGIN_VERSION ${PADDLE_VERSION})
3842
set(PROTO_FILE "${PADDLE_SOURCE_DIR}/paddle/phi/core/external_error.proto")
@@ -66,8 +70,7 @@ target_include_directories(external_error_proto
6670
target_link_libraries(external_error_proto PUBLIC protobuf)
6771
set_target_properties(external_error_proto PROPERTIES POSITION_INDEPENDENT_CODE
6872
ON)
69-
70-
add_custom_target(external_deps DEPENDS eigen3 zlib protobuf)
73+
add_custom_target(external_deps DEPENDS eigen3 zlib protobuf flagcx)
7174

7275
if(WITH_COREX)
7376
add_definitions(-DPADDLE_WITH_COREX)
@@ -258,7 +261,9 @@ target_link_libraries(
258261
protobuf
259262
external_error_proto
260263
cuinfer
261-
nccl)
264+
nccl
265+
# change nccl to ${FLAGCX_LIB} if compiling with FlagCX ${FLAGCX_LIB}
266+
)
262267

263268
include_directories(BEFORE ${PADDLE_SOURCE_DIR})
264269

backends/iluvatar_gpu/build_paddle.sh

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#!/bin/bash
22

33
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -26,6 +26,15 @@ export CMAKE_CUDA_ARCHITECTURES=${COREX_ARCH}
2626
CURRENT_DIR=$(pwd)
2727
PADDLE_SOURCE_DIR="${CURRENT_DIR}/../../Paddle"
2828
PATCH_FILE="${CURRENT_DIR}/patches/paddle-corex.patch"
29+
# set BUILD_WITH_FLAGCX to 1 if we want to use flagcx as communication backend
30+
BUILD_WITH_FLAGCX=0
31+
FLAGCX_ROOT="/workspace/FlagCX"
32+
33+
if [ "$BUILD_WITH_FLAGCX" == "1" ]; then
34+
WITH_FLAGCX="ON"
35+
else
36+
WITH_FLAGCX="OFF"
37+
fi
2938

3039
bash clean_paddle.sh
3140

@@ -51,9 +60,10 @@ if [[ ! -d "build" ]]; then
5160
fi
5261
pushd build
5362

54-
cmake -DPY_VERSION=${PYTHON_VERSION} -DWITH_COREX=ON \
55-
-DWITH_DISTRIBUTE=ON -DWITH_NCCL=ON -DWITH_RCCL=OFF -DCMAKE_BUILD_TYPE=Release \
63+
cmake -DPY_VERSION=${PYTHON_VERSION} -DWITH_COREX=ON -DPADDLE_SOURCE_DIR=${PADDLE_SOURCE_DIR} \
64+
-DWITH_DISTRIBUTE=ON -DWITH_NCCL=ON -DWITH_FLAGCX=${WITH_FLAGCX} -DWITH_RCCL=OFF -DCMAKE_BUILD_TYPE=Release \
5665
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DON_INFER=ON -DCOREX_VERSION=${COREX_VERSION} -DCOREX_ARCH=${COREX_ARCH} \
66+
-DFLAGCX_ROOT=${FLAGCX_ROOT} \
5767
-DCMAKE_CXX_FLAGS='-Wno-error=pessimizing-move -Wno-error=deprecated-copy -Wno-error=init-list-lifetime' \
5868
-DCMAKE_CUDA_FLAGS='-Xclang -fcuda-allow-variadic-functions -mllvm --skip-double' \
5969
-DWITH_ARM=OFF -DWITH_DGC=OFF .. 2>&1 | tee compile.log
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
set(CMAKE_FIND_DEBUG_MODE ON)
2+
# flagcx.cmake
3+
if(NOT WITH_FLAGCX)
4+
return()
5+
endif()
6+
7+
set(FLAGCX_SOURCE_DIR "${FLAGCX_ROOT}")
8+
set(FLAGCX_LIB_DIR "${FLAGCX_SOURCE_DIR}/build/lib")
9+
set(FLAGCX_BINARY_DIR "${PADDLE_SOURCE_DIR}/build/third_party/flagcx")
10+
set(THIRD_PARTY_DIR "${PADDLE_SOURCE_DIR}/build/third_party")
11+
12+
file(REMOVE_RECURSE ${FLAGCX_BINARY_DIR})
13+
message(STATUS "removed old flagcx dir")
14+
message(STATUS "Copying third-party source to build directory")
15+
execute_process(COMMAND cp -r ${FLAGCX_SOURCE_DIR} ${THIRD_PARTY_DIR}
16+
RESULT_VARIABLE COPY_RESULT)
17+
18+
if(NOT COPY_RESULT EQUAL 0)
19+
message(FATAL_ERROR "Failed to copy third-party source to build directory")
20+
endif()
21+
22+
# Create a custom target to build the third-party library
23+
message(STATUS "Building third-party library with its Makefile")
24+
25+
find_path(
26+
FLAGCX_INCLUDE_DIR flagcx.h
27+
PATHS ${FLAGCX_SOURCE_DIR}/flagcx/include
28+
NO_DEFAULT_PATH)
29+
30+
message(STATUS "FLAGCX_INCLUDE_DIR is ${FLAGCX_INCLUDE_DIR}")
31+
include_directories(SYSTEM ${FLAGCX_INCLUDE_DIR})
32+
33+
add_library(flagcx INTERFACE)
34+
find_library(
35+
FLAGCX_LIB
36+
NAMES flagcx libflagcx
37+
PATHS ${FLAGCX_LIB_DIR}
38+
DOC "My custom library")
39+
40+
add_dependencies(flagcx FLAGCX_LIB)
41+
message(STATUS "FLAGCX_LIB is ${FLAGCX_LIB}")

0 commit comments

Comments
 (0)