vllm-project · wangxiyuan · Mar 26, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -36,6 +36,10 @@ on:
       model_list:
         required: true
         type: string
+      model_filter:
+        required: false
+        type: string
+        default: ''
       upload:
         required: false
         type: boolean
@@ -182,6 +186,43 @@ jobs:
             pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
           } >> "$GITHUB_ENV"
 
+      - name: Compute effective model list
+        run: |
+          model_filter='${{ inputs.model_filter }}'
+          all_models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
+
+          if [[ "$model_filter" == "all" ]] || [[ -z "$model_filter" ]]; then
+            effective_models="$all_models"
+          else
+            any_model_in_filter=false
+            while IFS= read -r model; do
+              if [[ "$model_filter" == *",${model},"* ]]; then
+                any_model_in_filter=true
+                break
+              fi
+            done <<< "$all_models"
+
+            if [[ "$any_model_in_filter" == "true" ]]; then
+              effective_models=""
+              while IFS= read -r model; do
+                if [[ "$model_filter" == *",${model},"* ]]; then
+                  effective_models+="$model"$'\n'
+                fi
+              done <<< "$all_models"
+              effective_models="${effective_models%$'\n'}"
+            else
+              effective_models="$all_models"
+            fi
+          fi
+
+          echo "Effective models to test:"
+          echo "$effective_models"
+          {
+            echo 'EFFECTIVE_MODELS<<EOF'
+            echo "$effective_models"
+            echo 'EOF'
+          } >> "$GITHUB_ENV"
+
       - name: Run vllm-project/vllm-ascend accuracy test
         id: report
         env:
@@ -200,7 +241,7 @@ jobs:
           mkdir -p ./benchmarks/accuracy
           echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib" >> ~/.bashrc
           echo "Received model_list: ${{ inputs.model_list }}"
-          models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
+          models="$EFFECTIVE_MODELS"
           any_failure=0
           for model in $models; do
             echo "Running test for model: $model"
@@ -218,7 +259,7 @@ jobs:
       - name: Generate step summary
         if: ${{ always() }}
         run: |
-          models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
+          models="$EFFECTIVE_MODELS"
           for model in $models; do
             echo "Processing model: $model"
             model_base_name=$(basename "$model")

@@ -146,46 +146,48 @@ jobs:
     secrets:
       KUBECONFIG_B64: ${{ secrets.KUBECONFIG_HK_001_INTERNAL_B64 }}
 
-  single-node-accuracy-tests:
+  generate-accuracy-matrix:
+    name: Generate accuracy test matrix
     needs: [parse-trigger]
-    if: always() && needs.parse-trigger.outputs.run == 'true'
+    if: >-
+      always() &&
+      needs.parse-trigger.outputs.run == 'true' && (
+        needs.parse-trigger.outputs.filter == 'all' ||
+        contains(needs.parse-trigger.outputs.filter, 'accuracy-group')
+      )
+    runs-on: linux-aarch64-a2b3-0
+    outputs:
+      nightly_matrix: ${{ steps.set-matrix.outputs.nightly_matrix }}
+      pr_only_matrix: ${{ steps.set-matrix.outputs.pr_only_matrix }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Read accuracy group config
+        id: set-matrix
+        run: |
+          CONFIG_FILE="tests/e2e/models/configs/accuracy_groups_a2.json"
+          NIGHTLY=$(jq -c '.nightly' "$CONFIG_FILE")
+          PR_ONLY=$(jq -c '.pr_only' "$CONFIG_FILE")
+          echo "nightly_matrix=${NIGHTLY}" >> "$GITHUB_OUTPUT"
+          echo "pr_only_matrix=${PR_ONLY}" >> "$GITHUB_OUTPUT"
+
+  single-node-accuracy-tests:
+    needs: [parse-trigger, generate-accuracy-matrix]
+    if: >-
+      always() &&
+      needs.parse-trigger.outputs.run == 'true' &&
+      needs.generate-accuracy-matrix.result != 'skipped'
     strategy:
       fail-fast: false
       matrix:
-        test_config:
-          - name: accuracy-group-1
-            os: linux-aarch64-a2b3-1
-            model_list:
-              - Qwen3-VL-8B-Instruct-W8A8
-              - Qwen3-8B
-              - Qwen2-Audio-7B-Instruct
-              - Qwen3-8B-W8A8
-              - Qwen3-VL-8B-Instruct
-              - Qwen2.5-Omni-7B
-          - name: accuracy-group-2
-            os: linux-aarch64-a2b3-1
-            model_list:
-              - ERNIE-4.5-21B-A3B-PT
-              - InternVL3_5-8B-hf
-              - Molmo-7B-D-0924
-              - Llama-3.2-3B-Instruct
-              - llava-onevision-qwen2-0.5b-ov-hf
-          - name: accuracy-group-3
-            os: linux-aarch64-a2b3-2
-            model_list:
-              - Qwen3-30B-A3B
-              - Qwen3-VL-30B-A3B-Instruct
-              - Qwen3-30B-A3B-W8A8
-          - name: accuracy-group-4
-            os: linux-aarch64-a2b3-4
-            model_list:
-              - Qwen3-Next-80B-A3B-Instruct
-              - Qwen3-Omni-30B-A3B-Instruct
+        test_config: ${{ fromJson(needs.generate-accuracy-matrix.outputs.nightly_matrix) }}
     uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
     with:
       vllm: v0.18.0
       runner: ${{ matrix.test_config.os }}
       model_list: ${{ toJson(matrix.test_config.model_list) }}
+      model_filter: ${{ needs.parse-trigger.outputs.filter }}
       image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11'
       is_run: >-
         ${{
@@ -196,6 +198,32 @@ jobs:
         }}
       upload: false
 
+  single-node-accuracy-tests-pr-only:
+    needs: [parse-trigger, generate-accuracy-matrix]
+    if: >-
+      always() &&
+      needs.parse-trigger.outputs.run == 'true' &&
+      needs.parse-trigger.outputs.filter != 'all' &&
+      needs.generate-accuracy-matrix.result != 'skipped'
+    strategy:
+      fail-fast: false
+      matrix:
+        test_config: ${{ fromJson(needs.generate-accuracy-matrix.outputs.pr_only_matrix) }}
+    uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
+    with:
+      vllm: v0.18.0
+      runner: ${{ matrix.test_config.os }}
+      model_list: ${{ toJson(matrix.test_config.model_list) }}
+      model_filter: ${{ needs.parse-trigger.outputs.filter }}
+      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11'
+      is_run: >-
+        ${{
+          needs.parse-trigger.outputs.run == 'true' &&
+          needs.parse-trigger.outputs.filter != 'all' &&
+          contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name))
+        }}
+      upload: false
+
   doc-test:
     name: doc-test
     needs: [parse-trigger]

@@ -0,0 +1,54 @@
+{
+  "nightly": [
+    {
+      "name": "accuracy-group-1",
+      "os": "linux-aarch64-a2b3-1",
+      "model_list": [
+        "Qwen3-VL-8B-Instruct-W8A8",
+        "Qwen3-8B",
+        "Qwen2-Audio-7B-Instruct",
+        "Qwen3-8B-W8A8",
+        "Qwen3-VL-8B-Instruct",
+        "Qwen2.5-Omni-7B"
+      ]
+    },
+    {
+      "name": "accuracy-group-2",
+      "os": "linux-aarch64-a2b3-1",
+      "model_list": [
+        "ERNIE-4.5-21B-A3B-PT",
+        "InternVL3_5-8B-hf",
+        "Molmo-7B-D-0924",
+        "Llama-3.2-3B-Instruct",
+        "llava-onevision-qwen2-0.5b-ov-hf"
+      ]
+    },
+    {
+      "name": "accuracy-group-3",
+      "os": "linux-aarch64-a2b3-2",
+      "model_list": [
+        "Qwen3-30B-A3B",
+        "Qwen3-VL-30B-A3B-Instruct",
+        "Qwen3-30B-A3B-W8A8"
+      ]
+    },
+    {
+      "name": "accuracy-group-4",
+      "os": "linux-aarch64-a2b3-4",
+      "model_list": [
+        "Qwen3-Next-80B-A3B-Instruct",
+        "Qwen3-Omni-30B-A3B-Instruct"
+      ]
+    }
+  ],
+  "pr_only": [
+    {
+      "name": "pr-accuracy-group-1",
+      "os": "linux-aarch64-a2b3-1",
+      "model_list": [
+        "gemma-3-4b-it",
+        "internlm3-8b-instruct"
+      ]
+    }
+  ]
+}