diff --git a/backends/iluvatar_gpu/CMakeLists.txt b/backends/iluvatar_gpu/CMakeLists.txt
index 532965c8fda..a8d770f3b7b 100644
--- a/backends/iluvatar_gpu/CMakeLists.txt
+++ b/backends/iluvatar_gpu/CMakeLists.txt
@@ -33,6 +33,10 @@ include(external/eigen)
 include(external/xxhash)
 include(external/zlib)
 include(external/protobuf)
+if(WITH_FLAGCX)
+  add_definitions("-DPADDLE_WITH_FLAGCX")
+  include(external/flagcx)
+endif()
 
 set(PLUGIN_VERSION ${PADDLE_VERSION})
 set(PROTO_FILE "${PADDLE_SOURCE_DIR}/paddle/phi/core/external_error.proto")
@@ -66,8 +70,10 @@ target_include_directories(external_error_proto
 target_link_libraries(external_error_proto PUBLIC protobuf)
 set_target_properties(external_error_proto PROPERTIES POSITION_INDEPENDENT_CODE
                                                       ON)
-
 add_custom_target(external_deps DEPENDS eigen3 zlib protobuf)
+if(WITH_FLAGCX)
+  add_custom_target(external_deps DEPENDS flagcx)
+endif()
 
 if(WITH_COREX)
   add_definitions(-DPADDLE_WITH_COREX)
@@ -258,7 +264,9 @@ target_link_libraries(
           protobuf
           external_error_proto
           cuinfer
-          nccl)
+          nccl
+          # change nccl to ${FLAGCX_LIB} if compiling with FlagCX ${FLAGCX_LIB}
+)
 
 include_directories(BEFORE ${PADDLE_SOURCE_DIR})
 
diff --git a/backends/iluvatar_gpu/build_paddle.sh b/backends/iluvatar_gpu/build_paddle.sh
index 57565784d8e..880ac9094b3 100644
--- a/backends/iluvatar_gpu/build_paddle.sh
+++ b/backends/iluvatar_gpu/build_paddle.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
 
 # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -26,6 +26,15 @@ export CMAKE_CUDA_ARCHITECTURES=${COREX_ARCH}
 CURRENT_DIR=$(pwd)
 PADDLE_SOURCE_DIR="${CURRENT_DIR}/../../Paddle"
 PATCH_FILE="${CURRENT_DIR}/patches/paddle-corex.patch"
+# set BUILD_WITH_FLAGCX to 1 if we want to use flagcx as communication backend
+BUILD_WITH_FLAGCX=0
+FLAGCX_ROOT="/workspace/FlagCX"
+
+if [ "$BUILD_WITH_FLAGCX" == "1" ]; then
+    WITH_FLAGCX="ON"
+else
+    WITH_FLAGCX="OFF"
+fi
 
 bash clean_paddle.sh
 
@@ -51,9 +60,10 @@ if [[ ! -d "build" ]]; then
 fi
 pushd build
 
-cmake -DPY_VERSION=${PYTHON_VERSION} -DWITH_COREX=ON \
--DWITH_DISTRIBUTE=ON -DWITH_NCCL=ON -DWITH_RCCL=OFF -DCMAKE_BUILD_TYPE=Release \
+cmake -DPY_VERSION=${PYTHON_VERSION} -DWITH_COREX=ON -DPADDLE_SOURCE_DIR=${PADDLE_SOURCE_DIR} \
+-DWITH_DISTRIBUTE=ON -DWITH_NCCL=ON -DWITH_FLAGCX=${WITH_FLAGCX} -DWITH_RCCL=OFF -DCMAKE_BUILD_TYPE=Release \
 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DON_INFER=ON -DCOREX_VERSION=${COREX_VERSION} -DCOREX_ARCH=${COREX_ARCH} \
+-DFLAGCX_ROOT=${FLAGCX_ROOT} \
 -DCMAKE_CXX_FLAGS='-Wno-error=pessimizing-move -Wno-error=deprecated-copy -Wno-error=init-list-lifetime' \
 -DCMAKE_CUDA_FLAGS='-Xclang -fcuda-allow-variadic-functions -mllvm --skip-double' \
 -DWITH_ARM=OFF -DWITH_DGC=OFF .. 2>&1 | tee compile.log
diff --git a/backends/iluvatar_gpu/cmake/external/flagcx.cmake b/backends/iluvatar_gpu/cmake/external/flagcx.cmake
new file mode 100644
index 00000000000..020d1a8afc9
--- /dev/null
+++ b/backends/iluvatar_gpu/cmake/external/flagcx.cmake
@@ -0,0 +1,41 @@
+set(CMAKE_FIND_DEBUG_MODE ON)
+# flagcx.cmake
+if(NOT WITH_FLAGCX)
+  return()
+endif()
+
+set(FLAGCX_SOURCE_DIR "${FLAGCX_ROOT}")
+set(FLAGCX_LIB_DIR "${FLAGCX_SOURCE_DIR}/build/lib")
+set(FLAGCX_BINARY_DIR "${PADDLE_SOURCE_DIR}/build/third_party/flagcx")
+set(THIRD_PARTY_DIR "${PADDLE_SOURCE_DIR}/build/third_party")
+
+file(REMOVE_RECURSE ${FLAGCX_BINARY_DIR})
+message(STATUS "removed old flagcx dir")
+message(STATUS "Copying third-party source to build directory")
+execute_process(COMMAND cp -r ${FLAGCX_SOURCE_DIR} ${THIRD_PARTY_DIR}
+                RESULT_VARIABLE COPY_RESULT)
+
+if(NOT COPY_RESULT EQUAL 0)
+  message(FATAL_ERROR "Failed to copy third-party source to build directory")
+endif()
+
+# Create a custom target to build the third-party library
+message(STATUS "Building third-party library with its Makefile")
+
+find_path(
+  FLAGCX_INCLUDE_DIR flagcx.h
+  PATHS ${FLAGCX_SOURCE_DIR}/flagcx/include
+  NO_DEFAULT_PATH)
+
+message(STATUS "FLAGCX_INCLUDE_DIR is ${FLAGCX_INCLUDE_DIR}")
+include_directories(SYSTEM ${FLAGCX_INCLUDE_DIR})
+
+add_library(flagcx INTERFACE)
+find_library(
+  FLAGCX_LIB
+  NAMES flagcx libflagcx
+  PATHS ${FLAGCX_LIB_DIR}
+  DOC "My custom library")
+
+add_dependencies(flagcx FLAGCX_LIB)
+message(STATUS "FLAGCX_LIB is ${FLAGCX_LIB}")
diff --git a/backends/iluvatar_gpu/runtime/runtime.cc b/backends/iluvatar_gpu/runtime/runtime.cc
index 904c978158a..d4eb14aa4ef 100644
--- a/backends/iluvatar_gpu/runtime/runtime.cc
+++ b/backends/iluvatar_gpu/runtime/runtime.cc
@@ -15,6 +15,9 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <nccl.h>
+#if defined(PADDLE_WITH_FLAGCX)
+#include <flagcx.h>
+#endif
 #include <semaphore.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -46,6 +49,10 @@ static int global_current_device = 0;
 const char *const DeviceType = "iluvatar_gpu";
 const char *const SubDeviceType = "v0.1";
 
+#if defined(PADDLE_WITH_FLAGCX)
+C_CCLComm globalComm = nullptr;
+flagcxHandlerGroup_t flagcx_handler;
+#endif
 namespace phi {
 
 namespace internal {
@@ -75,6 +82,33 @@ inline ncclDataType_t PDDataTypeToNcclDataType(C_DataType type) {
   return ncclFloat32;
 }
 
+#if defined(PADDLE_WITH_FLAGCX)
+inline flagcxDataType_t PDDataTypeToFlagcxDataType(C_DataType type) {
+  if (type == C_DataType::FLOAT32) {
+    return flagcxFloat;
+  } else if (type == C_DataType::BFLOAT16) {
+    return flagcxBfloat16;
+  } else if (type == C_DataType::UINT8) {
+    return flagcxUint8;
+  } else if (type == C_DataType::UINT32) {
+    return flagcxUint32;
+  } else if (type == C_DataType::UINT64) {
+    return flagcxUint64;
+  } else if (type == C_DataType::INT8) {
+    return flagcxInt8;
+  } else if (type == C_DataType::INT32) {
+    return flagcxInt32;
+  } else if (type == C_DataType::INT64) {
+    return flagcxInt64;
+  } else if (type == C_DataType::FLOAT16) {
+    return flagcxHalf;
+  } else {
+    LOG(ERROR) << "Datatype " << type << " in flagcx is not supported.";
+  }
+  return flagcxFloat;
+}
+#endif
+
 #define NCCL_CHECK(cmd)                                                        \
   do {                                                                         \
     ncclResult_t r = cmd;                                                      \
@@ -86,6 +120,20 @@ inline ncclDataType_t PDDataTypeToNcclDataType(C_DataType type) {
     }                                                                          \
   } while (0)
 
+#if defined(PADDLE_WITH_FLAGCX)
+#define FLAGCX_CHECK(cmd)                                               \
+  do {                                                                  \
+    flagcxResult_t r = cmd;                                             \
+    if (r != flagcxSuccess) {                                           \
+      PADDLE_THROW(                                                     \
+          common::errors::External("Failed, FLAGCX error %s:%d '%s'\n", \
+                                   __FILE__,                            \
+                                   __LINE__,                            \
+                                   flagcxGetErrorString(r)));           \
+    }                                                                   \
+  } while (0)
+#endif
+
 class EigenGpuStreamDevice : public Eigen::StreamInterface {
  public:
   EigenGpuStreamDevice()
@@ -791,18 +839,51 @@ ncclRedOp_t PDReduceOpToNcclReduceOp(C_CCLReduceOp op) {
   }
 }
 
+#if defined(PADDLE_WITH_FLAGCX)
+flagcxRedOp_t PDReduceOpToFlagcxReduceOp(C_CCLReduceOp op) {
+  if (op == C_CCLReduceOp::MIN) {
+    return flagcxMin;
+  } else if (op == C_CCLReduceOp::MAX) {
+    return flagcxMax;
+  } else if (op == C_CCLReduceOp::SUM) {
+    return flagcxSum;
+  } else if (op == C_CCLReduceOp::PRODUCT) {
+    return flagcxProd;
+  } else if (op == C_CCLReduceOp::AVG) {
+    return flagcxAvg;
+  } else {
+    LOG(ERROR) << "Reduceop " << op << " in flagcx is not supported.";
+  }
+}
+#endif
+
 C_Status XcclGetUniqueIdSize(size_t *size) {
+#if defined(PADDLE_WITH_FLAGCX)
+  *size = sizeof(flagcxUniqueId);
+#else
   *size = sizeof(ncclUniqueId);
+#endif
   return C_SUCCESS;
 }
 
 C_Status XcclGetUniqueId(C_CCLRootId *unique_id) {
+#if defined(PADDLE_WITH_FLAGCX)
+  if (unique_id->sz != sizeof(flagcxUniqueId)) {
+    LOG(ERROR) << "unique_id->sz must be equal sizeof(ncclUniqueId)";
+    return C_FAILED;
+  }
+  flagcxUniqueId_t flagcxId =
+      reinterpret_cast<flagcxUniqueId *>(unique_id->data);
+  FLAGCX_CHECK(flagcxGetUniqueId(&flagcxId));
+  unique_id->data = flagcxId;
+#else
   if (unique_id->sz != sizeof(ncclUniqueId)) {
     LOG(ERROR) << "unique_id->sz must be equal sizeof(ncclUniqueId)";
     return C_FAILED;
   }
   NCCL_CHECK(
       ncclGetUniqueId(reinterpret_cast<ncclUniqueId *>(unique_id->data)));
+#endif
 
   return C_SUCCESS;
 }
@@ -811,17 +892,32 @@ C_Status XcclCommInitRank(size_t nranks,
                           C_CCLRootId *unique_id,
                           size_t rank,
                           C_CCLComm *comm) {
+#if defined(PADDLE_WITH_FLAGCX)
+  FLAGCX_CHECK(
+      flagcxCommInitRank(reinterpret_cast<flagcxComm_t *>(comm),
+                         nranks,
+                         reinterpret_cast<flagcxUniqueId *>(unique_id->data),
+                         rank));
+  globalComm = *comm;
+  VLOG(4) << "[FLAGCX] comm inited: " << reinterpret_cast<flagcxComm_t>(*comm);
+#else
   NCCL_CHECK(
       ncclCommInitRank(reinterpret_cast<ncclComm_t *>(comm),
                        nranks,
                        *(reinterpret_cast<ncclUniqueId *>(unique_id->data)),
                        rank));
   VLOG(4) << "[NCCL] comm inited: " << reinterpret_cast<ncclComm_t>(*comm);
+#endif
   return C_SUCCESS;
 }
 
 C_Status XcclDestroyComm(C_CCLComm comm) {
+#if defined(PADDLE_WITH_FLAGCX)
+  FLAGCX_CHECK(flagcxCommDestroy(reinterpret_cast<flagcxComm_t>(comm)));
+  globalComm = nullptr;
+#else
   NCCL_CHECK(ncclCommDestroy(reinterpret_cast<ncclComm_t>(comm)));
+#endif
   return C_SUCCESS;
 }
 
@@ -832,6 +928,18 @@ C_Status XcclAllReduce(void *send_buf,
                        C_CCLReduceOp op,
                        C_CCLComm comm,
                        C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+
+  FLAGCX_CHECK(
+      flagcxAllReduce(send_buf,
+                      recv_buf,
+                      count,
+                      phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                      PDReduceOpToFlagcxReduceOp(op),
+                      reinterpret_cast<flagcxComm_t>(comm),
+                      reinterpret_cast<flagcxStream_t>(&cudaStream)));
+#else
   NCCL_CHECK(ncclAllReduce(send_buf,
                            recv_buf,
                            count,
@@ -839,6 +947,7 @@ C_Status XcclAllReduce(void *send_buf,
                            PDReduceOpToNcclReduceOp(op),
                            reinterpret_cast<ncclComm_t>(comm),
                            reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
@@ -848,6 +957,17 @@ C_Status XcclBroadcast(void *buf,
                        size_t root,
                        C_CCLComm comm,
                        C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(
+      flagcxBroadcast(static_cast<const void *>(buf),
+                      buf,
+                      count,
+                      phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                      root,
+                      reinterpret_cast<flagcxComm_t>(comm),
+                      reinterpret_cast<flagcxStream_t>(&cudaStream)));
+#else
   NCCL_CHECK(ncclBroadcast(static_cast<const void *>(buf),
                            buf,
                            count,
@@ -855,6 +975,7 @@ C_Status XcclBroadcast(void *buf,
                            root,
                            reinterpret_cast<ncclComm_t>(comm),
                            reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
@@ -866,6 +987,18 @@ C_Status XcclReduce(void *send_buf,
                     size_t root,
                     C_CCLComm comm,
                     C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(
+      flagcxReduce(send_buf,
+                   recv_buf,
+                   count,
+                   phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                   PDReduceOpToFlagcxReduceOp(op),
+                   root,
+                   reinterpret_cast<flagcxComm_t>(comm),
+                   reinterpret_cast<flagcxStream_t>(&cudaStream)));
+#else
   NCCL_CHECK(ncclReduce(send_buf,
                         recv_buf,
                         count,
@@ -874,6 +1007,7 @@ C_Status XcclReduce(void *send_buf,
                         root,
                         reinterpret_cast<ncclComm_t>(comm),
                         reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
@@ -883,12 +1017,23 @@ C_Status XcclAllGather(void *send_buf,
                        C_DataType data_type,
                        C_CCLComm comm,
                        C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(
+      flagcxAllGather(send_buf,
+                      recv_buf,
+                      count,
+                      phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                      reinterpret_cast<flagcxComm_t>(comm),
+                      reinterpret_cast<flagcxStream_t>(&cudaStream)));
+#else
   NCCL_CHECK(ncclAllGather(send_buf,
                            recv_buf,
                            count,
                            phi::internal::PDDataTypeToNcclDataType(data_type),
                            reinterpret_cast<ncclComm_t>(comm),
                            reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
@@ -899,6 +1044,17 @@ C_Status XcclReduceScatter(void *send_buf,
                            C_CCLReduceOp op,
                            C_CCLComm comm,
                            C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(
+      flagcxReduceScatter(send_buf,
+                          recv_buf,
+                          count,
+                          phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                          PDReduceOpToFlagcxReduceOp(op),
+                          reinterpret_cast<flagcxComm_t>(comm),
+                          reinterpret_cast<flagcxStream_t>(&cudaStream)));
+#else
   NCCL_CHECK(
       ncclReduceScatter(send_buf,
                         recv_buf,
@@ -907,16 +1063,25 @@ C_Status XcclReduceScatter(void *send_buf,
                         PDReduceOpToNcclReduceOp(op),
                         reinterpret_cast<ncclComm_t>(comm),
                         reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
 C_Status XcclGroupStart() {
+#if defined(PADDLE_WITH_FLAGCX)
+  FLAGCX_CHECK(flagcxGroupStart(reinterpret_cast<flagcxComm_t>(globalComm)));
+#else
   NCCL_CHECK(ncclGroupStart());
+#endif
   return C_SUCCESS;
 }
 
 C_Status XcclGroupEnd() {
+#if defined(PADDLE_WITH_FLAGCX)
+  FLAGCX_CHECK(flagcxGroupEnd(reinterpret_cast<flagcxComm_t>(globalComm)));
+#else
   NCCL_CHECK(ncclGroupEnd());
+#endif
   return C_SUCCESS;
 }
 
@@ -926,12 +1091,24 @@ C_Status XcclSend(void *send_buf,
                   size_t dest_rank,
                   C_CCLComm comm,
                   C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(flagcxSend(send_buf,
+                          count,
+                          phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                          dest_rank,
+                          reinterpret_cast<flagcxComm_t>(comm),
+                          reinterpret_cast<flagcxStream_t>(&cudaStream)));
+  flagcx_handler->devHandle->streamSynchronize(
+      reinterpret_cast<flagcxStream_t>(&cudaStream));
+#else
   NCCL_CHECK(ncclSend(send_buf,
                       count,
                       phi::internal::PDDataTypeToNcclDataType(data_type),
                       dest_rank,
                       reinterpret_cast<ncclComm_t>(comm),
                       reinterpret_cast<cudaStream_t>(stream)));
+#endif
   return C_SUCCESS;
 }
 
@@ -941,14 +1118,67 @@ C_Status XcclRecv(void *recv_buf,
                   size_t src_rank,
                   C_CCLComm comm,
                   C_Stream stream) {
+#if defined(PADDLE_WITH_FLAGCX)
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(flagcxRecv(recv_buf,
+                          count,
+                          phi::internal::PDDataTypeToFlagcxDataType(data_type),
+                          src_rank,
+                          reinterpret_cast<flagcxComm_t>(comm),
+                          reinterpret_cast<flagcxStream_t>(&cudaStream)));
+  flagcx_handler->devHandle->streamSynchronize(
+      reinterpret_cast<flagcxStream_t>(&cudaStream));
+#else
   NCCL_CHECK(ncclRecv(recv_buf,
                       count,
                       phi::internal::PDDataTypeToNcclDataType(data_type),
                       src_rank,
                       reinterpret_cast<ncclComm_t>(comm),
                       reinterpret_cast<cudaStream_t>(stream)));
+#endif
+  return C_SUCCESS;
+}
+
+#if defined(PADDLE_WITH_FLAGCX)
+C_Status XcclAllToAll(const void **send_buf,
+                      const size_t *send_count,
+                      const C_DataType *send_dtype,
+                      void **recv_buf,
+                      const size_t *recv_count,
+                      const C_DataType *recv_dtype,
+                      size_t rank,
+                      size_t nranks,
+                      C_CCLComm comm,
+                      C_Stream stream) {
+  flagcxComm_t flagcxComm = reinterpret_cast<flagcxComm_t>(comm);
+  cudaStream_t cudaStream = reinterpret_cast<cudaStream_t>(stream);
+  FLAGCX_CHECK(flagcxGroupStart(flagcxComm));
+  for (size_t i = 0; i < nranks; i++) {
+    if (send_count[i] > 0) {
+      FLAGCX_CHECK(
+          flagcxSend(const_cast<void *>(send_buf[i]),
+                     send_count[i],
+                     phi::internal::PDDataTypeToFlagcxDataType(send_dtype[i]),
+                     i,
+                     flagcxComm,
+                     reinterpret_cast<flagcxStream_t>(&cudaStream)));
+    }
+    if (recv_count[i] > 0) {
+      FLAGCX_CHECK(
+          flagcxRecv(const_cast<void *>(recv_buf[i]),
+                     recv_count[i],
+                     phi::internal::PDDataTypeToFlagcxDataType(recv_dtype[i]),
+                     i,
+                     flagcxComm,
+                     reinterpret_cast<flagcxStream_t>(&cudaStream)));
+    }
+  }
+  FLAGCX_CHECK(flagcxGroupEnd(flagcxComm));
+  flagcx_handler->devHandle->streamSynchronize(
+      reinterpret_cast<flagcxStream_t>(&cudaStream));
   return C_SUCCESS;
 }
+#endif
 
 void InitPlugin(CustomRuntimeParams *params) {
   PADDLE_CUSTOM_RUNTIME_CHECK_VERSION(params);
@@ -959,6 +1189,9 @@ void InitPlugin(CustomRuntimeParams *params) {
          0,
          sizeof(C_DeviceInterface));
 
+#if defined(PADDLE_WITH_FLAGCX)
+  flagcxHandleInit(&flagcx_handler);
+#endif
   params->interface->get_compute_capability = GetComputeCapability;
   params->interface->get_runtime_version = GetRuntimeVersion;
   params->interface->get_driver_version = GetDriverVersion;
@@ -1022,6 +1255,9 @@ void InitPlugin(CustomRuntimeParams *params) {
   params->interface->xccl_reduce = XcclReduce;
   params->interface->xccl_reduce_scatter = XcclReduceScatter;
   params->interface->xccl_send = XcclSend;
+#if defined(PADDLE_WITH_FLAGCX)
+  params->interface->xccl_all_to_all = XcclAllToAll;
+#endif
 
   params->interface->profiler_collect_trace_data = nullptr;
   params->interface->profiler_initialize = nullptr;