vllm-project · Potabk · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/.github/workflows/scripts/install_and_test.sh b/.github/workflows/scripts/install_and_test.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+set -euo pipefail
+export WORKSPACE="/home/workspace"
+
+check_npu_info() {
+    npu-smi info
+    cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+}
+
+check_and_config() {
+    # config mirror
+    pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+    export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
+
+}
+
+install_sys_dependencies() {
+    echo "====> Install system dependencies"
+    cd $WORKSPACE
+    # install sys dependencies
+    apt-get update -y
+    apt-get -y install `cat /root/workspace/packages.txt`
+    apt-get -y install gcc g++ cmake libnuma-dev iproute2
+    # kimi-k2 dependency
+    pip install blobfile
+}
+
+install_vllm() {
+    # install vllm
+    cd $WORKSPACE/vllm-empty
+    VLLM_TARGET_DEVICE=empty pip install -e .
+
+    # install vllm-ascend
+    cd $WORKSPACE
+    pip install -e .
+}
+
+wait_for_server() {
+    echo "====> Waiting for server to start"
+}
+
+main() {
+    NODE_TYPE=$1
+    if [ -n "${2:-}" ]; then
+        export MASTER_ADDR="$2"
+    fi
+    check_npu_info
+    check_and_config
+    install_sys_dependencies
+    install_vllm
+    echo "====> Installation completed successfully"
+    echo "====> Starting multi node tests"
+    # test data parallel on mp backend
+    . $WORKSPACE/examples/online_serving/multi_node_dp.sh "$NODE_TYPE"
+
+    # test pipline parallel on ray backend
+    sleep 1000
+}
+
+main "$@"
+
diff --git a/.github/workflows/scripts/start_container.sh b/.github/workflows/scripts/start_container.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+IMAGE_NAME="quay.nju.edu.cn/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11"
+sudo docker pull $IMAGE_NAME
+
+CONTAINER_NAME="ascend_ci_a3"
+
+if docker ps -a --format '{{.Names}}' | grep -qw "^${CONTAINER_NAME}$"; then
+    echo "Container '$CONTAINER_NAME' exists. Removing it..."
+
+    if docker ps --format '{{.Names}}' | grep -qw "^${CONTAINER_NAME}$"; then
+        echo "Stopping container '$CONTAINER_NAME'..."
+        docker stop "$CONTAINER_NAME"
+    fi
+
+    docker rm "$CONTAINER_NAME"
+    echo "Container '$CONTAINER_NAME' has been removed."
+fi
+
+echo "starting ascend NPU-A3 container"
+
+# Run the container using the defined variables
+docker run -itd \
+    --name "$CONTAINER_NAME" \
+    --net=host \
+    --device /dev/davinci0 \
+    --device /dev/davinci1 \
+    --device /dev/davinci2 \
+    --device /dev/davinci3 \
+    --device /dev/davinci4 \
+    --device /dev/davinci5 \
+    --device /dev/davinci6 \
+    --device /dev/davinci7 \
+    --device /dev/davinci8 \
+    --device /dev/davinci9 \
+    --device /dev/davinci10 \
+    --device /dev/davinci11 \
+    --device /dev/davinci12 \
+    --device /dev/davinci13 \
+    --device /dev/davinci14 \
+    --device /dev/davinci15 \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    -e CLUSTER_SIZE:$CLUSTER_SIZE \
+    -e MASTER_ADDR:$MASTER_ADDR \
+    -e WORKSPACE="/home/workspace" \
+    -v $GITHUB_WORKSPACE:/home/workspace \
+    -v /usr/local/dcmi:/usr/local/dcmi \
+    -v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
+    -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
+    -v /etc/ascend_install.info:/etc/ascend_install.info \
+    -v /root/actions-runner/.cache:/root/actions-runner/.cache \
+    -v /mnt/sfs_turbo/ascend-ci-share-nv-action-vllm-benchmarks:/root/.cache \
+    $IMAGE_NAME bash
+
+# Check if container start successfully
+if [ $? -eq 0 ]; then
+  echo "Container $CONTAINER_NAME start successfully"
+else
+  echo "Container $CONTAINER_NAME start failed, please check if the images exist or permission"
+  exit 1
+fi
diff --git a/.github/workflows/vllm_ascend_multi_node_test.yaml b/.github/workflows/vllm_ascend_multi_node_test.yaml
@@ -0,0 +1,95 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: 'Multi-Node-Test'
+# This workflow runs nightly benchmarks for vllm-ascend.
+
+on:
+  workflow_dispatch:
+    # Allow manual triggering of the workflow
+
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  get_header_node_ip:
+    # This job is used to get the header node IP address.
+    name: 'Get Header Node IP'
+    runs-on: linux-aarch64-a3-node0
+    outputs:
+        header_ip: ${{ steps.get_header_node_ip.outputs.MASTER_IP }}
+    steps:
+      - name: Get header node IP
+        id: get_header_node_ip
+        run: |
+          echo "MASTER_IP=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT
+  test_multi_node:
+    # Currently, we run multi-node tests only on: vllm==main, vllm-ascend==main.
+    name: 'Multi-Node-Test / DP'
+    needs: get_header_node_ip
+    strategy:
+        matrix:
+          runner: [linux-aarch64-a3-node0, linux-aarch64-a3-node1]
+    runs-on: ${{matrix.runner}}
+    env:
+      CONTAINER_NAME: ascend_ci_a3
+      WORKSPACE: /home/workspace
+      CLUSTER_SIZE: 2
+      MASTER_IP: ${{ needs.get_header_node_ip.outputs.header_ip }}
+    steps:
+
+      - name: Set config
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Checkout vllm-ascend
+        uses: actions/checkout@v4
+        with:
+          repository: Potabk/vllm-ascend
+          ref: multi_node_ci
+          path: ./
+
+      - name: Checkout vllm
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: main
+          path: ./vllm-empty
+
+      - name: Start container
+        run: |
+          bash .github/workflows/scripts/start_container.sh
+
+      - name: Run multi-node test
+        run: |
+          SCRIPT_PATH="$WORKSPACE/.github/workflows/scripts/install_and_test.sh"
+          if [ "${{ matrix.runner }}" == "linux-aarch64-a3-node0" ]; then
+            docker exec -i $CONTAINER_NAME bash -lc "bash $SCRIPT_PATH header"
+          else
+            docker exec -i $CONTAINER_NAME bash -lc "bash $SCRIPT_PATH worker $MASTER_IP"
+          fi
+
+      - name: Docker post test cleanup
+        if: always()
+        run: |
+          docker exec -i $CONTAINER_NAME bash -lc 'find "$WORKSPACE" -mindepth 1 -maxdepth 1 -xdev -exec rm -rf {} +'
+          docker rm -f ascend_ci_a3 2>/dev/null || true
diff --git a/examples/online_serving/multi_node_dp.sh b/examples/online_serving/multi_node_dp.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+set -euo pipefail
+
+run_node() {
+    NODE_TYPE=$1
-    NODE_TYPE=$1
+    NODE_TYPE=$1
+    MODEL_PATH=$2
+    if [ -z "$MODEL_PATH" ]; then
+        echo "Error: model path must be provided as the second argument" >&2
+        exit 1
+    fi
-    NODE_TYPE=$1
+    NODE_TYPE=$1
+    MODEL_PATH=$2
+    if [ -z "$MODEL_PATH" ]; then
+        echo "Error: model path must be provided as the second argument" >&2
+        exit 1
+    fi
+    echo "====> Running $NODE_TYPE"
+
+    local_ip=$(hostname -I | awk '{print $1}')
-    local_ip=$(hostname -I | awk '{print $1}')
+    local_ip=$(hostname -I | awk '{print $1}')
+    if [ -z "$local_ip" ]; then
+        echo "Error: Could not determine local IP address." >&2
+        exit 1
+    fi
-    local_ip=$(hostname -I | awk '{print $1}')
+    local_ip=$(hostname -I | awk '{print $1}')
+    if [ -z "$local_ip" ]; then
+        echo "Error: Could not determine local IP address." >&2
+        exit 1
+    fi
+    iface=$(ip -o -4 addr show | awk -v ip="$local_ip" '$4 ~ ip"/" {print $2}')
+
+    export HCCL_IF_IP=$local_ip
+    export GLOO_SOCKET_IFNAME=$iface
+    export TP_SOCKET_IFNAME=$iface
+    export HCCL_SOCKET_IFNAME=$iface
+    export OMP_PROC_BIND=false
+    export OMP_NUM_THREADS=100
+    export VLLM_USE_V1=1
+    export HCCL_BUFFSIZE=1024
+
+    if [ "$NODE_TYPE" == "header" ]; then
+        echo "====> Running header node"
+        vllm serve /root/.cache/weights/Kimi-K2-Instruct-W8A8 \
+        --host 0.0.0.0 \
+        --port 8004 \
+        --data-parallel-size 4 \
+        --api-server-count 2 \
+        --data-parallel-size-local 2 \
+        --data-parallel-address $local_ip \
+        --data-parallel-rpc-port 13389 \
+        --seed 1024 \
+        --served-model-name kimi \
+        --quantization ascend \
+        --tensor-parallel-size 8 \
+        --enable-expert-parallel \
+        --max-num-seqs 16 \
+        --max-model-len 32768 \
+        --max-num-batched-tokens 4096 \
+        --trust-remote-code \
+        --no-enable-prefix-caching \
+        --gpu-memory-utilization 0.9 \
+        --additional-config '{"ascend_scheduler_config":{"enabled":true},"torchair_graph_config":{"enabled":true}}'
+    else
+        echo "====> Running worker node"
-        echo "====> Running worker node"
+        if [ -z "${MASTER_ADDR-}" ]; then
+            echo "Error: MASTER_ADDR environment variable must be set for worker node." >&2
+            exit 1
+        fi
+        echo "====> Running worker node"
-        echo "====> Running worker node"
+        if [ -z "${MASTER_ADDR-}" ]; then
+            echo "Error: MASTER_ADDR environment variable must be set for worker node." >&2
+            exit 1
+        fi
+        echo "====> Running worker node"
+        vllm serve /root/.cache/weights/Kimi-K2-Instruct-W8A8 \
+        --host 0.0.0.0 \
+        --port 8004 \
+        --headless \
+        --data-parallel-size 4 \
+        --data-parallel-size-local 2 \
+        --data-parallel-start-rank 2 \
+        --data-parallel-address $MASTER_ADDR \
+        --data-parallel-rpc-port 13389 \
+        --seed 1024 \
+        --tensor-parallel-size 8 \
+        --served-model-name kimi \
+        --max-num-seqs 16 \
+        --max-model-len 32768 \
+        --quantization ascend \
+        --max-num-batched-tokens 4096 \
+        --enable-expert-parallel \
+        --trust-remote-code \
+        --no-enable-prefix-caching \
+        --gpu-memory-utilization 0.92 \
+        --additional-config '{"ascend_scheduler_config":{"enabled":true},"torchair_graph_config":{"enabled":true}}'
+    fi
+}
+
+run_node "$@"