diff --git a/.github/workflows/_build_xpu.yml b/.github/workflows/_build_xpu.yml
new file mode 100644
index 00000000000..af6d63e70e2
--- /dev/null
+++ b/.github/workflows/_build_xpu.yml
@@ -0,0 +1,195 @@
+name: XPU-Build-Test
+
+on:
+  workflow_call:
+    inputs:
+      DOCKER_IMAGE:
+        description: "Build Images"
+        required: true
+        type: string
+        default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci"
+      FASTDEPLOY_ARCHIVE_URL:
+        description: "URL of the compressed FastDeploy code archive."
+        required: true
+        type: string
+      WITH_NIGHTLY_BUILD:
+        description: "Enable nightly build mode (e.g. add date suffix to version)"
+        required: false
+        type: string
+        default: "OFF"
+      FD_VERSION:
+        description: "FastDeploy Package Version"
+        required: false
+        type: string
+        default: ""
+      PADDLEVERSION:
+        description: "Paddle Version Build Use"
+        required: false
+        type: string
+        default: ""
+      PADDLE_WHL_URL:
+        description: "Paddle Wheel Package URL"
+        required: false
+        type: string
+        default: ""
+    outputs:
+      wheel_path:
+        description: "Output path of the generated wheel"
+        value: ${{ jobs.xpu-build-test.outputs.wheel_path }}
+jobs:
+  xpu-build-test:
+    runs-on: [self-hosted, XPU-P800-8Card]
+    outputs:
+      wheel_path: ${{ steps.set_output.outputs.wheel_path }}
+    steps:
+      - name: Code Prepare
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+          IS_PR: ${{ github.event_name == 'pull_request' }}
+        run: |
+            set -x
+            REPO="https://github.com/${{ github.repository }}.git"
+            FULL_REPO="${{ github.repository }}"
+            REPO_NAME="${FULL_REPO##*/}"
+            BASE_BRANCH="${{ github.base_ref }}"
+            docker pull ${docker_image}
+            # Clean the repository directory before starting
+            docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
+            -e "REPO_NAME=${REPO_NAME}" \
+            ${docker_image} /bin/bash -c '
+              CLEAN_RETRIES=3
+              CLEAN_COUNT=0
+
+              while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
+                echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
+                rm -rf "${REPO_NAME}"* || true
+                sleep 2
+
+                # Check if anything matching ${REPO_NAME}* still exists
+                if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                  echo "All ${REPO_NAME}* removed successfully"
+                  break
+                fi
+
+                CLEAN_COUNT=$((CLEAN_COUNT + 1))
+              done
+
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
+                ls -ld "${REPO_NAME}"*
+                exit 1
+              fi
+            '
+
+            wget -q --no-proxy ${fd_archive_url}
+            tar -xf FastDeploy.tar.gz
+            rm -rf FastDeploy.tar.gz
+            cd FastDeploy
+            git config --global user.name "FastDeployCI"
+            git config --global user.email "fastdeploy_ci@example.com"
+            git log -n 3 --oneline
+      - name: FastDeploy Build
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_version: ${{ inputs.FD_VERSION }}
+          BRANCH_REF: ${{ github.ref_name }}
+          PADDLEVERSION: ${{ inputs.PADDLEVERSION }}
+          PADDLE_WHL_URL: ${{ inputs.PADDLE_WHL_URL }}
+          WITH_NIGHTLY_BUILD: ${{ inputs.WITH_NIGHTLY_BUILD }}
+        run: |
+            set -x
+            runner_name="${{ runner.name }}"
+            CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
+            gpu_id=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
+
+            PARENT_DIR=$(dirname "$WORKSPACE")
+            echo "PARENT_DIR:$PARENT_DIR"
+            docker run --rm --net=host \
+            --cap-add=SYS_PTRACE --privileged --shm-size=64G \
+            -v $(pwd):/workspace -w /workspace \
+            -v "/ssd3:/ssd3" \
+            -e "MODEL_PATH=/ssd3/model" \
+            -e "http_proxy=$(git config --global --get http.proxy)" \
+            -e "https_proxy=$(git config --global --get https.proxy)" \
+            -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
+            -e TZ="Asia/Shanghai" \
+            -e "FD_VERSION=${fd_version}" \
+            -e "WITH_NIGHTLY_BUILD=${WITH_NIGHTLY_BUILD}" \
+            -e "PADDLEVERSION=${PADDLEVERSION}" \
+            -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
+            -e "BRANCH_REF=${BRANCH_REF}" \
+             ${docker_image} /bin/bash -c '
+            if [[ -n "${FD_VERSION}" ]]; then
+              export FASTDEPLOY_VERSION=${FD_VERSION}
+              echo "Custom FastDeploy version: ${FASTDEPLOY_VERSION}"
+            fi
+
+            git config --global --add safe.directory /workspace/FastDeploy
+            chown -R $(whoami) /workspace/FastDeploy
+            cd FastDeploy
+            if [[ "${WITH_NIGHTLY_BUILD}" == "ON" ]];then
+              GIT_COMMIT_TIME=$(git --no-pager show -s --format=%ci HEAD)
+              DATE_ONLY=$(echo $GIT_COMMIT_TIME | sed "s/ .*//;s/-//g")
+              echo "Git Commit Time: $GIT_COMMIT_TIME"
+              echo "Date Only: $DATE_ONLY"
+              export FASTDEPLOY_VERSION="${FASTDEPLOY_VERSION}.dev${DATE_ONLY}"
+            fi
+            python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+            # 针对不同分支和tag使用不同的PaddlePaddle安装包
+            if [[ "${PADDLE_WHL_URL}" != "" ]];then
+              python -m pip install ${PADDLE_WHL_URL}
+            elif [[ "${PADDLEVERSION}" != "" ]];then
+              python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+              python -m pip install paddlepaddle-xpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+            else
+              python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+              # python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
+              python -m pip install https://paddle-whl.bj.bcebos.com/nightly/xpu-p800/paddlepaddle-xpu/paddlepaddle_xpu-3.4.0.dev20260107-cp310-cp310-linux_x86_64.whl
+            fi
+
+
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            bash custom_ops/xpu_ops/download_dependencies.sh develop
+            export CLANG_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xtdk
+            export XVLLM_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xvllm
+            bash build.sh
+            ls ./dist/*.whl
+            '
+      - name: Package Upload
+        id: set_output
+        run: |
+            set -x
+            if [[ "${{ github.event_name }}" == "pull_request" ]];then
+              commit_id=${{ github.event.pull_request.head.sha }}
+              pr_num=${{ github.event.pull_request.number }}
+              target_path=paddle-github-action/PR/FastDeploy/${pr_num}/${commit_id}/xpu
+            elif [[ "${{ github.ref_type }}" == "tag" ]]; then
+              commit_id=${{ github.sha }}
+              tag_name=${{ github.ref_name }}
+              target_path=paddle-github-action/TAG/FastDeploy/${tag_name}/${commit_id}/xpu
+            else
+              commit_id=${{ github.sha }}
+              branch_name=${{ github.ref_name }}
+              target_path=paddle-github-action/BRANCH/FastDeploy/${branch_name}/${commit_id}/xpu
+            fi
+            wget  -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
+            push_file=$(realpath bos_tools.py)
+            python3 --version
+            python3 -m pip install bce-python-sdk==0.9.29
+            cd FastDeploy/dist/
+            matches=($(ls fastdeploy*.whl))
+            if [ ${#matches[@]} -ne 1 ]; then
+              echo "Error: Found ${#matches[@]} matching files, expected exactly 1"
+              exit 1
+            fi
+            fd_wheel_name=${matches[0]}
+            echo "Found: $fd_wheel_name"
+            # tree -L 3
+            python3 ${push_file} fastdeploy*.whl ${target_path}
+            target_path_stripped="${target_path#paddle-github-action/}"
+            WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
+            echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT
\ No newline at end of file
diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml
new file mode 100644
index 00000000000..262a56bcd40
--- /dev/null
+++ b/.github/workflows/_xpu_4cards_case_test.yml
@@ -0,0 +1,195 @@
+name: xpu_4cards_case_test
+
+on:
+  workflow_call:
+    inputs:
+      DOCKER_IMAGE:
+        description: "Build Images"
+        required: true
+        type: string
+        default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci"
+      FASTDEPLOY_ARCHIVE_URL:
+        description: "URL of the compressed FastDeploy code archive."
+        required: true
+        type: string
+      FASTDEPLOY_WHEEL_URL:
+        description: "URL of the compressed FastDeploy whl ."
+        required: true
+        type: string
+      FD_VERSION:
+        description: "FastDeploy Package Version"
+        required: false
+        type: string
+        default: ""
+      PADDLEVERSION:
+        description: "Paddle Version Build Use"
+        required: false
+        type: string
+        default: ""
+      PADDLE_WHL_URL:
+        description: "Paddle Wheel Package URL"
+        required: false
+        type: string
+        default: ""
+      MODEL_PATH:
+        description: "MODEL Dir Use"
+        required: true
+        type: string
+        default: ""
+
+jobs:
+  run_xpu_4cards_cases:
+    runs-on: [self-hosted, XPU-P800-4Cards]
+    timeout-minutes: 60
+    steps:
+      - name: Print current runner name
+        run: |
+          echo "Current runner name: ${{ runner.name }}"
+      - name: Code Prepare
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+          fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
+          model_path: ${{ inputs.MODEL_PATH }}
+        run: |
+            set -x
+            REPO="https://github.com/${{ github.repository }}.git"
+            FULL_REPO="${{ github.repository }}"
+            REPO_NAME="${FULL_REPO##*/}"
+            BASE_BRANCH="${{ github.base_ref }}"
+            docker pull ${docker_image}
+            # Clean the repository directory before starting
+            docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
+            -e "REPO_NAME=${REPO_NAME}" \
+            ${docker_image} /bin/bash -c '
+              CLEAN_RETRIES=3
+              CLEAN_COUNT=0
+
+              while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
+                echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
+                rm -rf "${REPO_NAME}"* || true
+                sleep 2
+
+                # Check if anything matching ${REPO_NAME}* still exists
+                if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                  echo "All ${REPO_NAME}* removed successfully"
+                  break
+                fi
+
+                CLEAN_COUNT=$((CLEAN_COUNT + 1))
+              done
+
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
+                ls -ld "${REPO_NAME}"*
+                exit 1
+              fi
+            '
+
+            wget -q --no-proxy ${fd_archive_url}
+            tar -xf FastDeploy.tar.gz
+            rm -rf FastDeploy.tar.gz
+            cd FastDeploy
+            git config --global user.name "FastDeployCI"
+            git config --global user.email "fastdeploy_ci@example.com"
+            git log -n 3 --oneline
+
+      - name: Run CI unittest
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+          fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
+          model_path: ${{ inputs.MODEL_PATH }}
+        run: |
+          runner_name="${{ runner.name }}"
+          last_char="${runner_name: -1}"
+
+          if [[ "$last_char" == "1" ]]; then
+            xpu_id="4"
+          else
+            xpu_id="0"
+          fi
+          PARENT_DIR=$(dirname "$WORKSPACE")
+          echo "PARENT_DIR:$PARENT_DIR"
+          docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G  \
+          -v $(pwd):/workspace -w /workspace \
+          -v "/ssd3:/ssd3" \
+          -e "MODEL_PATH=${model_path}" \
+          -e "FASTDEPLOY_ARCHIVE_URL=${fd_archive_url}" \
+          -e "FASTDEPLOY_WHEEL_URL=${fd_wheel_url}" \
+          -e "PADDLEVERSION=${PADDLEVERSION}" \
+          -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
+          -e "http_proxy=$(git config --global --get http.proxy)" \
+          -e "https_proxy=$(git config --global --get https.proxy)" \
+          -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
+          -e "XPU_ID=${xpu_id}" \
+          ${docker_image} /bin/bash -c '
+          echo "安装lsof工具..."
+          apt install -y lsof
+
+          # 设置XPU_VISIBLE_DEVICES
+          if [[ "$XPU_ID" == "0" ]]; then
+              export XPU_VISIBLE_DEVICES="0,1,2,3"
+          else
+              export XPU_VISIBLE_DEVICES="4,5,6,7"
+          fi
+          echo "XPU_VISIBLE_DEVICES=$XPU_VISIBLE_DEVICES"
+
+          # 下载和安装xre
+          echo "下载和安装xre..."
+          mkdir -p /workspace/deps
+          cd /workspace/deps
+          if [ ! -d "xre" ]; then
+              wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
+              tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
+          fi
+          cd -
+          export PATH=/workspace/deps/xre/bin:$PATH
+
+          # 重启XPU卡
+          echo "重启XPU卡..."
+          xpu-smi -r -i $XPU_VISIBLE_DEVICES
+          xpu-smi
+          set -e
+          git config --global --add safe.directory /workspace/FastDeploy
+          cd FastDeploy
+          python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+          python -m pip install -r requirements.txt
+          echo "安装PaddlePaddle..."
+          # 针对不同分支和tag使用不同的PaddlePaddle安装包
+          if [[ "${PADDLE_WHL_URL}" != "" ]];then
+            python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+            python -m pip install ${PADDLE_WHL_URL}
+          elif [[ "${PADDLEVERSION}" != "" ]];then
+            python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+            python -m pip install paddlepaddle-xpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+          else
+            python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+            # python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
+            python -m pip install https://paddle-whl.bj.bcebos.com/nightly/xpu-p800/paddlepaddle-xpu/paddlepaddle_xpu-3.4.0.dev20260107-cp310-cp310-linux_x86_64.whl
+          fi
+          echo "安装上游任务编译的fastdeploy-xpu..."
+          python -m pip install ${FASTDEPLOY_WHEEL_URL}
+          rm -rf fastdeploy
+          python -m pip install ${FASTDEPLOY_WHEEL_URL} --no-deps --target=/workspace/FastDeploy
+          echo "============================安装测试依赖============================"
+          python -m pip install openai -U
+          python -m pip uninstall -y triton
+          python -m pip install triton==3.3.0
+          python -m pip install pytest
+          python -m pip install pytest-timeout
+          unset http_proxy
+          unset https_proxy
+          echo "============================开始运行pytest测试============================"
+          export PYTHONPATH=/workspace/FastDeploy/
+          python -m pytest -v -s --tb=short tests/xpu_ci/4cards_cases/
+          exit_code=$?
+
+          if [ $exit_code -eq 0 ]; then
+              echo "============================4卡cases测试通过!============================"
+          else
+              echo "============================4卡cases测试失败,请检查日志!============================"
+              exit $exit_code
+          fi
+          '
\ No newline at end of file
diff --git a/.github/workflows/_xpu_8cards_case_test.yml b/.github/workflows/_xpu_8cards_case_test.yml
new file mode 100644
index 00000000000..f333519ccee
--- /dev/null
+++ b/.github/workflows/_xpu_8cards_case_test.yml
@@ -0,0 +1,184 @@
+name: xpu_8cards_case_test
+
+on:
+  workflow_call:
+    inputs:
+      DOCKER_IMAGE:
+        description: "Build Images"
+        required: true
+        type: string
+        default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci"
+      FASTDEPLOY_ARCHIVE_URL:
+        description: "URL of the compressed FastDeploy code archive."
+        required: true
+        type: string
+      FASTDEPLOY_WHEEL_URL:
+        description: "URL of the compressed FastDeploy whl ."
+        required: true
+        type: string
+      FD_VERSION:
+        description: "FastDeploy Package Version"
+        required: false
+        type: string
+        default: ""
+      PADDLEVERSION:
+        description: "Paddle Version Build Use"
+        required: false
+        type: string
+        default: ""
+      PADDLE_WHL_URL:
+        description: "Paddle Wheel Package URL"
+        required: false
+        type: string
+        default: ""
+      MODEL_PATH:
+        description: "MODEL Dir Use"
+        required: true
+        type: string
+        default: ""
+
+jobs:
+  run_xpu_8cards_cases:
+    runs-on: [self-hosted, XPU-P800-8Cards]
+    timeout-minutes: 60
+    steps:
+      - name: Print current runner name
+        run: |
+          echo "Current runner name: ${{ runner.name }}"
+      - name: Code Prepare
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+          fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
+          model_path: ${{ inputs.MODEL_PATH }}
+        run: |
+            set -x
+            REPO="https://github.com/${{ github.repository }}.git"
+            FULL_REPO="${{ github.repository }}"
+            REPO_NAME="${FULL_REPO##*/}"
+            BASE_BRANCH="${{ github.base_ref }}"
+            docker pull ${docker_image}
+            # Clean the repository directory before starting
+            docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
+            -e "REPO_NAME=${REPO_NAME}" \
+            ${docker_image} /bin/bash -c '
+              CLEAN_RETRIES=3
+              CLEAN_COUNT=0
+
+              while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
+                echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
+                rm -rf "${REPO_NAME}"* || true
+                sleep 2
+
+                # Check if anything matching ${REPO_NAME}* still exists
+                if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                  echo "All ${REPO_NAME}* removed successfully"
+                  break
+                fi
+
+                CLEAN_COUNT=$((CLEAN_COUNT + 1))
+              done
+
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
+                ls -ld "${REPO_NAME}"*
+                exit 1
+              fi
+            '
+
+            wget -q --no-proxy ${fd_archive_url}
+            tar -xf FastDeploy.tar.gz
+            rm -rf FastDeploy.tar.gz
+            cd FastDeploy
+            git config --global user.name "FastDeployCI"
+            git config --global user.email "fastdeploy_ci@example.com"
+            git log -n 3 --oneline
+
+      - name: Run CI unittest
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+          fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
+          model_path: ${{ inputs.MODEL_PATH }}
+        run: |
+          runner_name="${{ runner.name }}"
+          last_char="${runner_name: -1}"
+
+          PARENT_DIR=$(dirname "$WORKSPACE")
+          echo "PARENT_DIR:$PARENT_DIR"
+          docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G  \
+          -v $(pwd):/workspace -w /workspace \
+          -v "/ssd3:/ssd3" \
+          -e "MODEL_PATH=${model_path}" \
+          -e "FASTDEPLOY_ARCHIVE_URL=${fd_archive_url}" \
+          -e "FASTDEPLOY_WHEEL_URL=${fd_wheel_url}" \
+          -e "PADDLEVERSION=${PADDLEVERSION}" \
+          -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
+          -e "http_proxy=$(git config --global --get http.proxy)" \
+          -e "https_proxy=$(git config --global --get https.proxy)" \
+          -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
+          ${docker_image} /bin/bash -c '
+          echo "安装lsof工具..."
+          apt install -y lsof
+
+          # 设置XPU_VISIBLE_DEVICES
+          export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+          echo "XPU_VISIBLE_DEVICES=$XPU_VISIBLE_DEVICES"
+
+          # 下载和安装xre
+          echo "下载和安装xre..."
+          mkdir -p /workspace/deps
+          cd /workspace/deps
+          if [ ! -d "xre" ]; then
+              wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
+              tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
+          fi
+          cd -
+          export PATH=/workspace/deps/xre/bin:$PATH
+
+          # 重启XPU卡
+          echo "重启XPU卡..."
+          xpu-smi -r -i $XPU_VISIBLE_DEVICES
+          xpu-smi
+          set -e
+          git config --global --add safe.directory /workspace/FastDeploy
+          cd FastDeploy
+          python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+          python -m pip install -r requirements.txt
+          echo "安装PaddlePaddle..."
+          # 针对不同分支和tag使用不同的PaddlePaddle安装包
+          if [[ "${PADDLE_WHL_URL}" != "" ]];then
+            python -m pip install ${PADDLE_WHL_URL}
+          elif [[ "${PADDLEVERSION}" != "" ]];then
+            python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+            python -m pip install paddlepaddle-xpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+          else
+            python -m pip uninstall paddlepaddle-xpu fastdeploy-xpu -y
+            # python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
+            python -m pip install https://paddle-whl.bj.bcebos.com/nightly/xpu-p800/paddlepaddle-xpu/paddlepaddle_xpu-3.4.0.dev20260107-cp310-cp310-linux_x86_64.whl
+          fi
+          echo "安装上游任务编译的fastdeploy-xpu..."
+          python -m pip install ${FASTDEPLOY_WHEEL_URL}
+          rm -rf fastdeploy
+          python -m pip install ${FASTDEPLOY_WHEEL_URL} --no-deps --target=/workspace/FastDeploy
+          echo "============================安装测试依赖============================"
+          python -m pip install openai -U
+          python -m pip uninstall -y triton
+          python -m pip install triton==3.3.0
+          python -m pip install pytest
+          python -m pip install pytest-timeout
+          unset http_proxy
+          unset https_proxy
+          echo "============================开始运行pytest测试============================"
+          export PYTHONPATH=/workspace/FastDeploy/
+          python -m pytest -v -s --tb=short tests/xpu_ci/8cards_cases/
+          exit_code=$?
+
+          if [ $exit_code -eq 0 ]; then
+              echo "============================8卡cases测试通过!============================"
+          else
+              echo "============================8卡cases测试失败,请检查日志!============================"
+              exit $exit_code
+          fi
+          '
\ No newline at end of file
diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml
index 7cb88cc16e4..14e8511095d 100644
--- a/.github/workflows/ci_xpu.yml
+++ b/.github/workflows/ci_xpu.yml
@@ -2,82 +2,43 @@ name: CI_XPU
 
 on:
   pull_request:
-    branches:
-      - develop
-      - 'release/*'
-  workflow_dispatch:
+    types: [opened, synchronize]
+    branches: [develop, release/**]
+permissions: read-all
 
 concurrency:
-  group: ${{ github.event.pull_request.number }}-xpu-ci
+  group: ${{ github.event.pull_request.number }}-${{ github.workflow }}
   cancel-in-progress: true
 
 jobs:
-  CI_XPU:
-    timeout-minutes: 60
-    runs-on: [self-hosted, XPU-P800-8Card]
-    steps:
-      - name: Print current runner name
-        run: |
-          echo "Current runner name: ${{ runner.name }}"
-      # Because the system version is lower than 2.23, the checkout cannot be used.
-      # - name: Checkout code
-      #   uses: actions/checkout@v4
+  clone:
+    name: FD-Clone-Linux-XPU
+    uses: ./.github/workflows/_clone_linux.yml
 
-      - name: Code Checkout
-        env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
-        run: |
-          REPO="https://github.com/${{ github.repository }}.git"
-          FULL_REPO="${{ github.repository }}"
-          REPO_NAME="${FULL_REPO##*/}"
-          BASE_BRANCH="${{ github.base_ref }}"
-          # Clean the repository directory before starting
-          docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-          -e "REPO_NAME=${REPO_NAME}" \
-          -e "BASE_BRANCH=${BASE_BRANCH}" \
-          ${docker_image} /bin/bash -c '
-            if [ -d ${REPO_NAME} ]; then
-              echo "Directory ${REPO_NAME} exists, removing it..."
-              rm -rf ${REPO_NAME}
-            fi
-          '
-          git config --global user.name "FastDeployCI"
-          git config --global user.email "fastdeploy_ci@example.com"
-          git clone ${REPO} ${REPO_NAME} -b ${BASE_BRANCH}
-          cd FastDeploy
-          if [ "${{ github.event_name }}" = "pull_request" ]; then
-            git fetch origin pull/${{ github.event.pull_request.number }}/head:pr/${{ github.event.pull_request.number }}
-            git merge pr/${{ github.event.pull_request.number }}
-            git log -n 3 --oneline
-          else
-            git checkout ${{ github.sha }}
-            git log -n 3 --oneline
-          fi
+  xpu_build_test:
+    name: xpu_build_test
+    needs: [clone]
+    uses: ./.github/workflows/_build_xpu.yml
+    with:
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
 
-      - name: Run CI unittest
-        env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
-        run: |
-          runner_name="${{ runner.name }}"
-          last_char="${runner_name: -1}"
+  xpu_4cards_case_test:
+    name: xpu_4cards_case_test
+    needs: [clone, xpu_build_test]
+    uses: ./.github/workflows/_xpu_4cards_case_test.yml
+    with:
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
+      FASTDEPLOY_WHEEL_URL: ${{ needs.xpu_build_test.outputs.wheel_path }}
+      MODEL_PATH: /ssd3/model
 
-          if [[ "$last_char" == "1" ]]; then
-            xpu_id="4"
-          else
-            xpu_id="0"
-          fi
-          PARENT_DIR=$(dirname "$WORKSPACE")
-          echo "PARENT_DIR:$PARENT_DIR"
-          docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G  \
-          -v $(pwd):/workspace -w /workspace \
-          -v "/ssd3:/ssd3" \
-          -e "MODEL_PATH=/ssd3/model" \
-          -e "http_proxy=$(git config --global --get http.proxy)" \
-          -e "https_proxy=$(git config --global --get https.proxy)" \
-          -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
-          -e "XPU_ID=${xpu_id}" \
-           ${docker_image} /bin/bash -c "
-          git config --global --add safe.directory /workspace/FastDeploy
-          cd FastDeploy
-          bash scripts/run_xpu_ci_pytest.sh
-          "
+  xpu_8cards_case_test:
+    name: xpu_8cards_case_test
+    needs: [clone, xpu_build_test]
+    uses: ./.github/workflows/_xpu_8cards_case_test.yml
+    with:
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
+      FASTDEPLOY_WHEEL_URL: ${{ needs.xpu_build_test.outputs.wheel_path }}
+      MODEL_PATH: /ssd3/model
diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/4cards_cases/conftest.py
similarity index 99%
rename from tests/xpu_ci/conftest.py
rename to tests/xpu_ci/4cards_cases/conftest.py
index 9df30e8f00c..b6918d33123 100644
--- a/tests/xpu_ci/conftest.py
+++ b/tests/xpu_ci/4cards_cases/conftest.py
@@ -364,7 +364,7 @@ def get_script_dir():
     """获取scripts目录路径"""
     # conftest.py在tests/xpu_ci_pytest/下,scripts在项目根目录下
     current_dir = os.path.dirname(os.path.abspath(__file__))
-    project_root = os.path.dirname(os.path.dirname(current_dir))
+    project_root = os.path.dirname (os.path.dirname(os.path.dirname(current_dir)))
     return os.path.join(project_root, "scripts")
 
 
diff --git a/tests/xpu_ci/test_ep4tp1_online.py b/tests/xpu_ci/4cards_cases/test_ep4tp1_online.py
similarity index 100%
rename from tests/xpu_ci/test_ep4tp1_online.py
rename to tests/xpu_ci/4cards_cases/test_ep4tp1_online.py
diff --git a/tests/xpu_ci/test_ep4tp4_all2all.py b/tests/xpu_ci/4cards_cases/test_ep4tp4_all2all.py
similarity index 100%
rename from tests/xpu_ci/test_ep4tp4_all2all.py
rename to tests/xpu_ci/4cards_cases/test_ep4tp4_all2all.py
diff --git a/tests/xpu_ci/test_ep4tp4_online.py b/tests/xpu_ci/4cards_cases/test_ep4tp4_online.py
similarity index 100%
rename from tests/xpu_ci/test_ep4tp4_online.py
rename to tests/xpu_ci/4cards_cases/test_ep4tp4_online.py
diff --git a/tests/xpu_ci/test_logprobs_21b_tp4.py b/tests/xpu_ci/4cards_cases/test_logprobs_21b_tp4.py
similarity index 100%
rename from tests/xpu_ci/test_logprobs_21b_tp4.py
rename to tests/xpu_ci/4cards_cases/test_logprobs_21b_tp4.py
diff --git a/tests/xpu_ci/test_mtp.py b/tests/xpu_ci/4cards_cases/test_mtp.py
similarity index 100%
rename from tests/xpu_ci/test_mtp.py
rename to tests/xpu_ci/4cards_cases/test_mtp.py
diff --git a/tests/xpu_ci/test_pd_03b_tp1.py b/tests/xpu_ci/4cards_cases/test_pd_03b_tp1.py
similarity index 100%
rename from tests/xpu_ci/test_pd_03b_tp1.py
rename to tests/xpu_ci/4cards_cases/test_pd_03b_tp1.py
diff --git a/tests/xpu_ci/test_pd_21b_tp2.py b/tests/xpu_ci/4cards_cases/test_pd_21b_tp2.py
similarity index 100%
rename from tests/xpu_ci/test_pd_21b_tp2.py
rename to tests/xpu_ci/4cards_cases/test_pd_21b_tp2.py
diff --git a/tests/xpu_ci/test_v1_mode.py b/tests/xpu_ci/4cards_cases/test_v1_mode.py
similarity index 100%
rename from tests/xpu_ci/test_v1_mode.py
rename to tests/xpu_ci/4cards_cases/test_v1_mode.py
diff --git a/tests/xpu_ci/test_vl_model.py b/tests/xpu_ci/4cards_cases/test_vl_model.py
similarity index 100%
rename from tests/xpu_ci/test_vl_model.py
rename to tests/xpu_ci/4cards_cases/test_vl_model.py
diff --git a/tests/xpu_ci/test_w4a8.py b/tests/xpu_ci/4cards_cases/test_w4a8.py
similarity index 100%
rename from tests/xpu_ci/test_w4a8.py
rename to tests/xpu_ci/4cards_cases/test_w4a8.py
diff --git a/tests/xpu_ci/8cards_cases/conftest.py b/tests/xpu_ci/8cards_cases/conftest.py
new file mode 100644
index 00000000000..b6918d33123
--- /dev/null
+++ b/tests/xpu_ci/8cards_cases/conftest.py
@@ -0,0 +1,487 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+XPU CI测试框架 - 通用配置和辅助函数
+
+这个文件包含了所有测试case共用的函数和fixture。
+主要功能:
+1. 进程管理 - 启动和停止API服务器
+2. 健康检查 - 等待服务启动成功
+3. 资源清理 - 清理日志、core文件、消息队列等
+4. 环境配置 - 设置XPU相关环境变量
+"""
+
+import json
+import os
+import shutil
+import subprocess
+import time
+
+import pytest
+
+
+def get_xpu_id():
+    """获取XPU_ID环境变量"""
+    return int(os.getenv("XPU_ID", "0"))
+
+
+def get_port_num():
+    """根据XPU_ID计算端口号"""
+    xpu_id = get_xpu_id()
+    return 8188 + xpu_id * 100
+
+
+def stop_processes():
+    """
+    停止所有相关进程（最小改动版，避免误杀 pytest）
+    """
+    xpu_id = get_xpu_id()  # noqa: F841
+    port_num = get_port_num()
+
+    # 获取 pytest 主进程 PID
+    try:
+        pytest_pids = subprocess.check_output("pgrep -f pytest || true", shell=True).decode().strip().split()
+    except subprocess.CalledProcessError:
+        pytest_pids = []
+
+    def safe_kill_cmd(cmd):
+        """执行 kill 命令，但排除 pytest 进程"""
+        try:
+            # 先执行命令获取到候选 PID（kill -9 替换成 cat）
+            list_cmd = cmd.replace("kill -9", "cat")
+            output = subprocess.check_output(list_cmd, shell=True, stderr=subprocess.DEVNULL).decode().strip().split()
+
+            # 过滤：排除 pytest
+            safe_pids = [pid for pid in output if pid and pid not in pytest_pids]
+
+            # 真正 kill
+            for pid in safe_pids:
+                subprocess.run(f"kill -9 {pid}", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        except Exception:
+            pass
+
+    commands = [
+        "ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs echo",
+        "ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs echo",
+        "ps -efww | grep -E 'multiprocessing' | grep -v grep | awk '{print $2}' | xargs echo",
+        "ps -efww | grep -E 'fastdeploy' | grep -v grep | awk '{print $2}' | xargs echo",
+        f"ps -efww | grep -E '{port_num}' | grep -v grep | awk '{{print $2}}' | xargs echo",
+        f"lsof -t -i :{port_num} | xargs echo",
+    ]
+
+    # Kill additional ports
+    for port in range(port_num + 10, port_num + 41):
+        commands.append(f"lsof -t -i :{port} | xargs echo")
+
+    # Kill processes using netstat
+    commands.extend(
+        [
+            f"netstat -tunlp 2>/dev/null | grep {port_num + 2} | awk '{{print $NF}}' | awk -F'/' '{{print $1}}' | xargs echo",
+            f"netstat -tunlp 2>/dev/null | grep {port_num + 2} | awk '{{print $(NF-1)}}' | cut -d/ -f1 | grep -E '^[0-9]+$' | xargs echo",
+        ]
+    )
+
+    for cmd in commands:
+        safe_kill_cmd(cmd)
+
+
+def cleanup_resources():
+    """
+    清理资源
+
+    包括:
+    1. 删除log目录
+    2. 删除core文件
+    3. 清空消息队列
+    """
+    # 删除log目录
+    if os.path.exists("log"):
+        shutil.rmtree("log")
+
+    # 删除core文件
+    subprocess.run("rm -f core*", shell=True)
+
+    # 清空消息队列
+    subprocess.run(
+        "ipcrm --all=msg 2>/dev/null || true", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+    )
+
+
+def wait_for_health_check(timeout=900, interval=10):
+    """
+    等待服务健康检查通过
+
+    Args:
+        timeout: 超时时间(秒), 默认15分钟
+        interval: 检查间隔(秒), 默认10秒
+
+    Returns:
+        bool: 服务是否启动成功
+    """
+    port_num = get_port_num()
+    health_endpoint = f"http://0.0.0.0:{port_num}/health"
+    models_endpoint = f"http://0.0.0.0:{port_num}/v1/models"
+    start_time = time.time()
+
+    print(f"开始服务健康检查,最长等待时间:{timeout}秒")
+
+    # 第一阶段: 等待 /health 返回 200
+    while True:
+        elapsed = int(time.time() - start_time)
+
+        # 超时判断
+        if elapsed >= timeout:
+            print(f"\n服务启动超时:经过 {timeout//60} 分钟服务仍未启动!")
+            return False
+
+        # 发送健康检查请求
+        try:
+            result = subprocess.run(
+                f'curl -s -o /dev/null -w "%{{http_code}}" -m 2 {health_endpoint}',
+                shell=True,
+                capture_output=True,
+                text=True,
+            )
+            http_code = result.stdout.strip()
+        except Exception:
+            http_code = "000"
+
+        print(f"\r服务健康检查中... 已等待 {elapsed} 秒,当前状态码:{http_code}", end="", flush=True)
+
+        if http_code == "200":
+            print(f"\n健康检查通过!耗时 {elapsed} 秒")
+            break
+
+        time.sleep(interval)
+
+    # 第二阶段: 等待 /v1/models 返回有效模型列表,确保模型完全就绪
+    print("开始验证模型是否就绪...")
+    while True:
+        elapsed = int(time.time() - start_time)
+
+        # 超时判断
+        if elapsed >= timeout:
+            print(f"\n模型就绪超时:经过 {timeout//60} 分钟模型仍未就绪!")
+            return False
+
+        # 检查模型列表
+        try:
+            result = subprocess.run(f"curl -s -m 5 {models_endpoint}", shell=True, capture_output=True, text=True)
+            response = result.stdout.strip()
+            if response:
+                data = json.loads(response)
+                # 检查是否有模型数据
+                if data.get("data") and len(data["data"]) > 0:
+                    model_id = data["data"][0].get("id", "unknown")
+                    print(f"\n模型就绪!模型ID: {model_id}, 总耗时 {elapsed} 秒")
+                    return True
+        except (json.JSONDecodeError, Exception) as e:  # noqa: F841
+            pass
+
+        print(f"\r等待模型就绪中... 已等待 {elapsed} 秒", end="", flush=True)
+        time.sleep(interval)
+
+
+def print_logs_on_failure():
+    """失败时打印日志"""
+    print("\n========== server.log ==========")
+    if os.path.exists("server.log"):
+        with open("server.log", "r") as f:
+            print(f.read())
+
+    print("\n========== log/workerlog.0 ==========")
+    if os.path.exists("log/workerlog.0"):
+        with open("log/workerlog.0", "r") as f:
+            print(f.read())
+
+
+def start_server(server_args, wait_before_check=60):
+    """
+    启动API服务器
+
+    Args:
+        server_args: 服务器启动参数列表
+        wait_before_check: 启动后等待多少秒再进行健康检查,默认60秒
+
+    Returns:
+        bool: 服务是否启动成功
+    """
+    # 停止旧进程
+    stop_processes()
+
+    # 清理资源
+    cleanup_resources()
+
+    # 构建启动命令
+    cmd = ["python", "-m", "fastdeploy.entrypoints.openai.api_server"] + server_args
+
+    # 启动服务(后台运行)
+    with open("server.log", "w") as log_file:
+        subprocess.Popen(cmd, stdout=log_file, stderr=subprocess.STDOUT, start_new_session=True)
+
+    print(f"服务启动命令: {' '.join(cmd)}")
+    print(f"等待 {wait_before_check} 秒...")
+    time.sleep(wait_before_check)
+
+    # 健康检查
+    if not wait_for_health_check():
+        print_logs_on_failure()
+        stop_processes()
+        return False
+
+    return True
+
+
+@pytest.fixture(scope="function")
+def xpu_env():
+    """
+    设置XPU环境变量
+
+    这个fixture会在每个测试开始时设置XPU_VISIBLE_DEVICES环境变量
+    测试结束后自动清理
+    """
+    xpu_id = get_xpu_id()
+
+    # 设置XPU_VISIBLE_DEVICES
+    if xpu_id == 0:
+        os.environ["XPU_VISIBLE_DEVICES"] = "0,1,2,3"
+    else:
+        os.environ["XPU_VISIBLE_DEVICES"] = "4,5,6,7"
+
+    print(f"\n设置环境变量: XPU_VISIBLE_DEVICES={os.environ['XPU_VISIBLE_DEVICES']}")
+
+    yield
+
+    # 测试结束后停止进程
+    print("\n测试结束,停止服务...")
+    stop_processes()
+
+
+def get_model_path():
+    """获取MODEL_PATH环境变量"""
+    model_path = os.getenv("MODEL_PATH")
+    if not model_path:
+        raise ValueError("MODEL_PATH environment variable is not set")
+    return model_path
+
+
+def setup_ep_env():
+    """
+    设置EP(Expert Parallel)相关环境变量
+
+    Returns:
+        dict: 原始环境变量值,用于后续恢复
+    """
+    env_vars = {
+        "BKCL_ENABLE_XDR": "1",
+        "BKCL_RDMA_NICS": "eth1,eth1,eth2,eth2",
+        "BKCL_TRACE_TOPO": "1",
+        "BKCL_PCIE_RING": "1",
+        "XSHMEM_MODE": "1",
+        "XSHMEM_QP_NUM_PER_RANK": "32",
+        "BKCL_RDMA_VERBS": "1",
+        "MOE_FFN_USE_DENSE_INPUT": "1",
+    }
+
+    # 保存原始值
+    original_values = {}
+    for key in env_vars:
+        original_values[key] = os.environ.get(key)
+
+    # 设置新值
+    for key, value in env_vars.items():
+        os.environ[key] = value
+        print(f"设置环境变量: {key}={value}")
+
+    # 设置BKCL_RDMA_NICS
+    rdma_nics = get_rdma_nics()
+    if rdma_nics:
+        os.environ["BKCL_RDMA_NICS"] = rdma_nics
+        print(f"设置环境变量: BKCL_RDMA_NICS={rdma_nics}")
+    return original_values
+
+
+def restore_env(original_values):
+    """
+    恢复环境变量
+
+    Args:
+        original_values: setup_ep_env()返回的原始环境变量值
+    """
+    for key, value in original_values.items():
+        if value is None:
+            if key in os.environ:
+                del os.environ[key]
+                print(f"删除环境变量: {key}")
+        else:
+            os.environ[key] = value
+            print(f"恢复环境变量: {key}={value}")
+
+
+def download_and_build_xdeepep():
+    """下载并编译xDeepEP(用于EP并行测试)"""
+    if os.path.exists("xDeepEP"):
+        print("xDeepEP已存在,跳过下载")
+        return True
+
+    print("下载xDeepEP...")
+    result = subprocess.run("wget -q https://paddle-qa.bj.bcebos.com/xpu_third_party/xDeepEP.tar.gz", shell=True)
+    if result.returncode != 0:
+        print("下载xDeepEP失败")
+        return False
+
+    print("解压xDeepEP...")
+    result = subprocess.run("tar -xzf xDeepEP.tar.gz", shell=True)
+    if result.returncode != 0:
+        print("解压xDeepEP失败")
+        return False
+
+    print("编译xDeepEP...")
+    result = subprocess.run("cd xDeepEP && bash build.sh && cd -", shell=True)
+    if result.returncode != 0:
+        print("编译xDeepEP失败")
+        return False
+
+    return True
+
+
+# ============ PD分离相关函数 ============
+
+
+def get_script_dir():
+    """获取scripts目录路径"""
+    # conftest.py在tests/xpu_ci_pytest/下,scripts在项目根目录下
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    project_root = os.path.dirname (os.path.dirname(os.path.dirname(current_dir)))
+    return os.path.join(project_root, "scripts")
+
+
+def get_rdma_nics():
+    """
+    获取RDMA网卡配置
+
+    Returns:
+        str: KVCACHE_RDMA_NICS的值,失败返回空字符串
+    """
+    script_path = os.path.join(get_script_dir(), "get_rdma_nics.sh")
+
+    try:
+        result = subprocess.run(f"bash {script_path} xpu", shell=True, capture_output=True, text=True)
+        output = result.stdout.strip()
+        # 解析 KVCACHE_RDMA_NICS=xxx 格式
+        if output.startswith("KVCACHE_RDMA_NICS="):
+            return output.split("=", 1)[1]
+        return output
+    except Exception as e:
+        print(f"获取RDMA网卡失败: {e}")
+        return ""
+
+
+def setup_pd_env():
+    """
+    设置PD分离相关环境变量
+
+    Returns:
+        dict: 原始环境变量值,用于后续恢复
+    """
+    original_values = {}
+    env_keys = ["KVCACHE_GDRCOPY_FLUSH_ENABLE", "KVCACHE_RDMA_NICS", "CUDA_ENABLE_P2P_NO_UVA"]
+
+    # 保存原始值
+    for key in env_keys:
+        original_values[key] = os.environ.get(key)
+
+    # 设置新值
+    os.environ["KVCACHE_GDRCOPY_FLUSH_ENABLE"] = "1"
+    os.environ["CUDA_ENABLE_P2P_NO_UVA"] = "1"  # 开启peer mem
+    print("设置环境变量: KVCACHE_GDRCOPY_FLUSH_ENABLE=1")
+    print("设置环境变量: CUDA_ENABLE_P2P_NO_UVA=1")
+
+    # 获取并设置RDMA网卡
+    rdma_nics = get_rdma_nics()
+    if rdma_nics:
+        os.environ["KVCACHE_RDMA_NICS"] = rdma_nics
+        print(f"设置环境变量: KVCACHE_RDMA_NICS={rdma_nics}")
+
+    return original_values
+
+
+def restore_pd_env(original_values):
+    """
+    恢复PD分离相关环境变量
+
+    Args:
+        original_values: setup_pd_env()返回的原始环境变量值
+    """
+    env_keys = ["KVCACHE_GDRCOPY_FLUSH_ENABLE", "KVCACHE_RDMA_NICS", "CUDA_ENABLE_P2P_NO_UVA"]
+
+    for key in env_keys:
+        if key in original_values:
+            if original_values[key] is None:
+                if key in os.environ:
+                    del os.environ[key]
+                    print(f"删除环境变量: {key}")
+            else:
+                os.environ[key] = original_values[key]
+                print(f"恢复环境变量: {key}={original_values[key]}")
+
+
+def setup_pd_ep_env():
+    """
+    设置PD分离+EP相关环境变量
+
+    Returns:
+        dict: 原始环境变量值,用于后续恢复
+    """
+    original_values_pd = setup_pd_env()
+    original_values_ep = setup_ep_env()
+    original_values = {**original_values_pd, **original_values_ep}
+    return original_values
+
+
+def restore_pd_ep_env(original_values):
+    """
+    恢复PD分离相关环境变量
+
+    Args:
+        original_values: setup_ep_env()返回的原始环境变量值
+    """
+    restore_env(original_values)
+    restore_pd_env(original_values)
+
+
+def setup_logprobs_env():
+    """
+    设置logprobs相关环境变量
+
+    Returns:
+        dict: 原始环境变量值,用于后续恢复
+    """
+    env_vars = {
+        "FD_USE_GET_SAVE_OUTPUT_V1": "1",
+    }
+    os.system("sysctl -w kernel.msgmax=131072")
+    os.system("sysctl -w kernel.msgmnb=33554432")
+
+    # 保存原始值
+    original_values = {}
+    for key in env_vars:
+        original_values[key] = os.environ.get(key)
+
+    # 设置新值
+    for key, value in env_vars.items():
+        os.environ[key] = value
+        print(f"设置环境变量: {key}={value}")
+    return original_values
diff --git a/tests/xpu_ci/test_pd_21b_tp1ep4.py b/tests/xpu_ci/8cards_cases/test_pd_21b_tp1ep4.py
similarity index 98%
rename from tests/xpu_ci/test_pd_21b_tp1ep4.py
rename to tests/xpu_ci/8cards_cases/test_pd_21b_tp1ep4.py
index b4aad965cf8..250e6a8a4b8 100644
--- a/tests/xpu_ci/test_pd_21b_tp1ep4.py
+++ b/tests/xpu_ci/8cards_cases/test_pd_21b_tp1ep4.py
@@ -32,6 +32,7 @@
 import pytest
 from conftest import (
     cleanup_resources,
+    download_and_build_xdeepep,
     get_model_path,
     get_port_num,
     restore_pd_ep_env,
@@ -134,6 +135,9 @@ def start_pd_server(model_path, port_num, wait_before_check=60):
 
     # 清理资源
     cleanup_resources()
+    if not download_and_build_xdeepep():
+        pytest.fail("xDeepEP下载或编译失败")
+
 
     # 清理并创建日志目录
     for log_dir in ["log_router", "log_prefill", "log_decode"]:
diff --git a/tests/xpu_ci/test_pd_21b_tp4ep4.py b/tests/xpu_ci/8cards_cases/test_pd_21b_tp4ep4.py
similarity index 98%
rename from tests/xpu_ci/test_pd_21b_tp4ep4.py
rename to tests/xpu_ci/8cards_cases/test_pd_21b_tp4ep4.py
index 2b69a0ed962..6b2ec6ee126 100644
--- a/tests/xpu_ci/test_pd_21b_tp4ep4.py
+++ b/tests/xpu_ci/8cards_cases/test_pd_21b_tp4ep4.py
@@ -32,6 +32,7 @@
 import pytest
 from conftest import (
     cleanup_resources,
+    download_and_build_xdeepep,
     get_model_path,
     get_port_num,
     restore_pd_ep_env,
@@ -134,6 +135,9 @@ def start_pd_server(model_path, port_num, wait_before_check=60):
 
     # 清理资源
     cleanup_resources()
+    
+    if not download_and_build_xdeepep():
+        pytest.fail("xDeepEP下载或编译失败")
 
     # 清理并创建日志目录
     for log_dir in ["log_router", "log_prefill", "log_decode"]: