vllm-project
diff --git a/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 5 additions & 3 deletions b/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 121 additions & 13 deletions b/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 121 additions & 13 deletions
diff --git a/‎.github/workflows/_schedule_image_build.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/_schedule_image_build.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -32,7 +32,7 @@ on:
         description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
       vllm_version:
         required: false
-        default: "v0.17.0"
+        default: "v0.18.0"
         type: string
         description: vllm version to use
       vllm_ascend_remote_url:
 
@@ -39,7 +39,7 @@ on:
       vllm_version:
         required: false
         type: string
-        default: "v0.17.0"
+        default: "v0.18.0"
       is_pr_test:
         required: true
         type: boolean
@@ -88,6 +88,7 @@ jobs:
         if: ${{ inputs.is_pr_test }}
         run: |
           pip uninstall -y vllm vllm-ascend || true
+          cp -r /vllm-workspace/vllm-ascend/benchmark /tmp/aisbench-backup || true
           rm -rf /vllm-workspace/vllm /vllm-workspace/vllm-ascend
 
       - name: Checkout vllm-project/vllm repo
@@ -125,15 +126,16 @@ jobs:
         env:
           PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
         run: |
+          git config --global --add safe.directory /vllm-workspace/vllm-ascend
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
 
       - name: Install aisbench
         if: ${{ inputs.is_pr_test }}
-        shell: bash -l {0} 
+        shell: bash -l {0}
         run: |
-          git clone -b v3.0-20250930-master --depth 1 https://gitee.com/aisbench/benchmark.git /vllm-workspace/vllm-ascend/benchmark
+          cp -r /tmp/aisbench-backup /vllm-workspace/vllm-ascend/benchmark
           cd /vllm-workspace/vllm-ascend/benchmark
           pip install pytest asyncio pytest-asyncio
           pip install -e . -r requirements/api.txt -r requirements/extra.txt
 
@@ -84,6 +84,7 @@ jobs:
           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
           git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
 
       - name: Checkout vllm-project/vllm-ascend repo
 
@@ -56,6 +56,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Install system dependencies
         run: |
@@ -92,20 +93,33 @@ jobs:
         env:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           if [ "${{ inputs.continue_on_error }}" = "true" ]; then
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-singlecard-light \
               --auto-partition-id "${{ matrix.part }}" \
               --auto-partition-size 1 \
               --auto-upgrade-estimated-times \
-              --continue-on-error
+              --continue-on-error \
+              2>&1 | tee /tmp/e2e-singlecard-light-part${{ matrix.part }}.log
           else
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-singlecard-light \
               --auto-partition-id "${{ matrix.part }}" \
-              --auto-partition-size 1
+              --auto-partition-size 1 \
+              2>&1 | tee /tmp/e2e-singlecard-light-part${{ matrix.part }}.log
           fi
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize singlecard-light failure
+        if: ${{ always() }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run singlecard-light test" \
+            --log-file /tmp/e2e-singlecard-light-part${{ matrix.part }}.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
 
       - name: Upload timing data
@@ -148,6 +162,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Install system dependencies
         run: |
@@ -183,20 +198,33 @@ jobs:
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        shell: bash
         run: |
+          set -o pipefail
           if [ "${{ inputs.continue_on_error }}" = "true" ]; then
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-singlecard \
               --auto-partition-id "${{ matrix.part }}" \
               --auto-partition-size 2 \
               --auto-upgrade-estimated-times \
-              --continue-on-error
+              --continue-on-error \
+              2>&1 | tee /tmp/e2e-singlecard-full-part${{ matrix.part }}.log
           else
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-singlecard \
               --auto-partition-id "${{ matrix.part }}" \
-              --auto-partition-size 2
+              --auto-partition-size 2 \
+              2>&1 | tee /tmp/e2e-singlecard-full-part${{ matrix.part }}.log
           fi
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize singlecard-full failure
+        if: ${{ always() }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run singlecard-full test" \
+            --log-file /tmp/e2e-singlecard-full-part${{ matrix.part }}.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
       - name: Upload timing data
         uses: actions/upload-artifact@v4
@@ -237,6 +265,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Install system dependencies
         run: |
@@ -271,20 +300,33 @@ jobs:
       - name: Run vllm-project/vllm-ascend test (light)
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           if [ "${{ inputs.continue_on_error }}" = "true" ]; then
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-2card-light \
               --auto-partition-id "${{ matrix.part }}" \
               --auto-partition-size 1 \
               --auto-upgrade-estimated-times \
-              --continue-on-error
+              --continue-on-error \
+              2>&1 | tee /tmp/e2e-2card-light-part${{ matrix.part }}.log
           else
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-2card-light \
               --auto-partition-id "${{ matrix.part }}" \
-              --auto-partition-size 1
+              --auto-partition-size 1 \
+              2>&1 | tee /tmp/e2e-2card-light-part${{ matrix.part }}.log
           fi
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize multicard-2-light failure
+        if: ${{ always() }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run multicard-2-light test" \
+            --log-file /tmp/e2e-2card-light-part${{ matrix.part }}.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
 
       - name: Upload timing data
@@ -326,6 +368,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Install system dependencies
         run: |
@@ -360,20 +403,33 @@ jobs:
       - name: Run vllm-project/vllm-ascend test (full)
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           if [ "${{ inputs.continue_on_error }}" = "true" ]; then
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-multicard-2-cards \
               --auto-partition-id "${{ matrix.part }}" \
               --auto-partition-size 1 \
               --auto-upgrade-estimated-times \
-              --continue-on-error
+              --continue-on-error \
+              2>&1 | tee /tmp/e2e-2card-full-part${{ matrix.part }}.log
           else
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-multicard-2-cards \
               --auto-partition-id "${{ matrix.part }}" \
-              --auto-partition-size 1
+              --auto-partition-size 1 \
+              2>&1 | tee /tmp/e2e-2card-full-part${{ matrix.part }}.log
           fi
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize multicard-2-full failure
+        if: ${{ always() }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run multicard-2-full test " \
+            --log-file /tmp/e2e-2card-full-part${{ matrix.part }}.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
 
       - name: Upload timing data
@@ -389,9 +445,21 @@ jobs:
         if: ${{ inputs.type == 'full' && matrix.part == 0 }}
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           python3 -m pip uninstall -y triton-ascend
-          pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
+          pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py \
+            2>&1 | tee /tmp/e2e-non-triton.log
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize non-triton failure
+        if: ${{ always() && inputs.type == 'full' && matrix.part == 0 }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run multicard-2-full test (non triton)" \
+            --log-file /tmp/e2e-non-triton.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
   e2e-4-cards-full:
     name: multicard-4-full
@@ -422,6 +490,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Install system dependencies
         run: |
@@ -457,20 +526,33 @@ jobs:
       - name: Run vllm-project/vllm-ascend test for V1 Engine
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           if [ "${{ inputs.continue_on_error }}" = "true" ]; then
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-multicard-4-cards \
               --auto-partition-id "${{ matrix.part }}" \
               --auto-partition-size 1 \
               --auto-upgrade-estimated-times \
-              --continue-on-error
+              --continue-on-error \
+              2>&1 | tee /tmp/e2e-4card-full-part${{ matrix.part }}.log
           else
             python3 .github/workflows/scripts/run_suite.py \
               --suite e2e-multicard-4-cards \
               --auto-partition-id "${{ matrix.part }}" \
-              --auto-partition-size 1
+              --auto-partition-size 1 \
+              2>&1 | tee /tmp/e2e-4card-full-part${{ matrix.part }}.log
           fi
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize multicard-4-full failure
+        if: ${{ always() }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run vllm-project/vllm-ascend test for V1 Engine" \
+            --log-file /tmp/e2e-4card-full-part${{ matrix.part }}.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
 
       - name: Upload timing data
@@ -504,6 +586,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Checkout vllm-project/vllm-ascend repo
         uses: actions/checkout@v6
@@ -540,9 +623,21 @@ jobs:
         env:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py \
-          tests/e2e/310p/singlecard/test_vl_model_singlecard.py
+          tests/e2e/310p/singlecard/test_vl_model_singlecard.py \
+          2>&1 | tee /tmp/e2e-310p-singlecard.log
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize 310p singlecard failure
+        if: ${{ always() && inputs.contains_310 }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run vllm-project/vllm-ascend test" \
+            --log-file /tmp/e2e-310p-singlecard.log \
+            --output "$GITHUB_STEP_SUMMARY"
 
   e2e_310p-4cards:
     name: 310p multicards 4cards
@@ -566,6 +661,7 @@ jobs:
           pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
           apt-get update -y
           apt install git -y
+          git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
 
       - name: Checkout vllm-project/vllm-ascend repo
         uses: actions/checkout@v6
@@ -602,8 +698,20 @@ jobs:
         env:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+        shell: bash
         run: |
+          set -o pipefail
           pytest -sv --durations=0 \
           tests/e2e/310p/multicard/test_dense_model_multicard.py \
           tests/e2e/310p/multicard/test_moe_model_multicard.py \
-          tests/e2e/310p/multicard/test_vl_model_multicard.py
+          tests/e2e/310p/multicard/test_vl_model_multicard.py \
+          2>&1 | tee /tmp/e2e-310p-4cards.log
+          exit ${PIPESTATUS[0]}
+
+      - name: Summarize 310p multicards failure
+        if: ${{ always() && inputs.contains_310 }}
+        run: |
+          python3 .github/workflows/scripts/ci_log_summary.py \
+            --step-name "Run vllm-project/vllm-ascend test" \
+            --log-file /tmp/e2e-310p-4cards.log \
+            --output "$GITHUB_STEP_SUMMARY"
@@ -70,14 +70,14 @@ jobs:
         password: ${{ secrets.QUAY_PASSWORD }}
 
     - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@v4
       with:
         install: true
         driver: docker-container
         use: true
 
     - name: Build and push
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@v7
       id: build
       with:
         platforms: ${{ matrix.arch }}
@@ -168,7 +168,7 @@ jobs:
           password: ${{ secrets.QUAY_PASSWORD }}
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       - name: Merge and push multi-arch image
         env: