vllm-project
diff --git a/‎.gemini/config.yaml
Lines changed: 6 additions & 0 deletions b/‎.gemini/config.yaml
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/750-RFC.yml
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/750-RFC.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/accuracy_test.yaml
Lines changed: 12 additions & 11 deletions b/‎.github/workflows/accuracy_test.yaml
Lines changed: 12 additions & 11 deletions
diff --git a/‎.github/workflows/format_pr_body.yaml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/format_pr_body.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release_code.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release_code.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release_whl.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release_whl.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_test.yaml
Lines changed: 6 additions & 8 deletions b/‎.github/workflows/vllm_ascend_test.yaml
Lines changed: 6 additions & 8 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_310p.yaml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm_ascend_test_310p.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_test_long_term.yaml
Lines changed: 0 additions & 102 deletions b/‎.github/workflows/vllm_ascend_test_long_term.yaml
Lines changed: 0 additions & 102 deletions
diff --git a/‎Dockerfile
Lines changed: 1 addition & 1 deletion b/‎Dockerfile
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,6 @@
+# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
+have_fun: false  # Just review the code
+code_review:
+  comment_severity_threshold: HIGH  # Reduce quantity of comments
+  pull_request_opened:
+    summary: false  # Don't summarize the PR in a separate comment
@@ -40,7 +40,7 @@ body:
   attributes:
     label: Any Other Things.
     description: >
-      Any other things you would like to mention.
+      Any other things you would like to mention, such as feature branch request.
   validations:
     required: false
 - type: markdown
 
@@ -70,6 +70,8 @@ jobs:
             runner: linux-aarch64-a2-1
           - model_name: Qwen3-30B-A3B
             runner: linux-aarch64-a2-2
+          - model_name: DeepSeek-V2-Lite
+            runner: linux-aarch64-a2-2
       fail-fast: false
 
     name: ${{ matrix.model_name }} accuracy
@@ -200,9 +202,8 @@ jobs:
           markdown_name="${model_base_name}"
           echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
           mkdir -p ./benchmarks/accuracy
-          pytest -sv ./tests/e2e/singlecard/models/test_lm_eval_correctness.py \
-          --config ./tests/e2e/singlecard/models/configs/${{ matrix.model_name }}.yaml \
-          --report_output ./benchmarks/accuracy/${model_base_name}.md 
+          pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
+          --config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
 
       - name: Generate step summary
         if: ${{ always() }}
@@ -225,14 +226,14 @@ jobs:
 
     outputs:
       model_name: ${{ steps.set_output.outputs.model_name }}
-      
+      vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }} 
+
   create_pr:
     runs-on: ubuntu-latest
     needs: accuracy_tests
     if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
     env:
       UPSTREAM_REPO: vllm-project/vllm-ascend
-
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -257,10 +258,10 @@ jobs:
           TIMESTAMP=$(date +%Y%m%d%H%M%S)
           BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
           echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
-          git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
+          git checkout -B "${BRANCH_NAME}" upstream/main
 
       - name: Download only current run reports
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v5
         with:
           path: ./docs/source/developer_guide/evaluation/accuracy_report
           pattern: report-*
@@ -298,7 +299,7 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
         run: |
           git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
-          git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
+          git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
           git push -f origin "${{ env.BRANCH_NAME }}"
 
       - name: Create PR in upstream via API
@@ -310,9 +311,9 @@ jobs:
               owner: 'vllm-project',
               repo: 'vllm-ascend',
               head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
-              base: '${{ github.event.inputs.vllm-ascend-version }}',
-              title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
-              body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)
+              base: 'main',
+              title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
+              body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
             
               - [Workflow run][1]
               
 
@@ -46,7 +46,7 @@ jobs:
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
 
       - name: Set up Python
         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
 
@@ -43,7 +43,7 @@ jobs:
       matrix:
         python-version: ["3.11"]
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
 
       - name: Print
         run: |
 
@@ -52,7 +52,7 @@ jobs:
          ) }}
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+    - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
 
     - name: Print
       run: |
 
@@ -81,7 +81,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [main, v0.10.0]
+        vllm_version: [v0.10.1.1, main]
     steps:
       - name: Install packages
         run: |
@@ -137,7 +137,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [main, v0.10.0]
+        vllm_version: [v0.10.1.1, main]
     name: singlecard e2e test
     runs-on: ${{ matrix.os }}
     container:
@@ -192,7 +192,7 @@ jobs:
           VLLM_USE_MODELSCOPE: True
         run: |
           pytest -sv tests/e2e/singlecard/test_offline_inference.py
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
+          # pytest -sv tests/e2e/singlecard/test_ilama_lora.py
           pytest -sv tests/e2e/singlecard/test_guided_decoding.py
           pytest -sv tests/e2e/singlecard/test_camem.py
           pytest -sv tests/e2e/singlecard/test_embedding.py
@@ -211,16 +211,15 @@ jobs:
           --ignore=tests/e2e/singlecard/test_embedding.py \
           --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
           --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \
-          --ignore=tests/e2e/singlecard/test_offline_inference_310p.py \
-          --ignore=tests/e2e/singlecard/models/test_lm_eval_correctness.py
+          --ignore=tests/e2e/singlecard/test_offline_inference_310p.py
   e2e-2-cards:
     needs: [e2e]
     if: ${{ needs.e2e.result == 'success' }}
     strategy:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [main, v0.10.0]
+        vllm_version: [v0.10.1.1, main]
     name: multicard e2e test
     runs-on: ${{ matrix.os }}
     container:
@@ -274,12 +273,11 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         run: |
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+          # pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
           # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
           # To avoid oom, we need to run the test in a single process.
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_alltoallv
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
 
@@ -53,7 +53,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [main, v0.10.0]
+        vllm_version: [v0.10.1.1, main]
     name: 310p e2e test
     runs-on: ${{ matrix.os }}
     container:
 
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.0
+ARG VLLM_TAG=v0.10.1.1
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \