vllm-project · Yikun · Sep 28, 2025
diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
@@ -85,108 +85,7 @@ jobs:
         run: |
           # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
           # the test separately.
-
-          pytest -sv tests/e2e/singlecard/test_aclgraph.py
-          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_chunked.py
-          pytest -sv tests/e2e/singlecard/test_embedding.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
-          pytest -sv tests/e2e/singlecard/test_quantization.py
-          pytest -sv tests/e2e/singlecard/test_sampler.py
-          pytest -sv tests/e2e/singlecard/test_vlm.py
-
-          # ------------------------------------ v1 spec decode test ------------------------------------ #
+          npu-smi info
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
-
-          pytest -sv tests/e2e/singlecard/ops/
-
-  e2e-2-cards:
-    name: multicard
-    runs-on: ${{ inputs.runner }}-2
-    container:
-      image: ${{ inputs.image }}
-      env:
-        VLLM_LOGGING_LEVEL: ERROR
-        VLLM_USE_MODELSCOPE: True
-    steps:
-      - name: Check npu and CANN info
-        run: |
           npu-smi info
-          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
-
-      - name: Config mirrors
-        run: |
-          sed -Ei 's@(ports|archive)[email protected]:8081@g' /etc/apt/sources.list
-          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
-          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
-          apt-get update -y
-          apt install git -y
-
-      - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v4
-
-      - name: Install system dependencies
-        run: |
-          apt-get -y install `cat packages.txt`
-          apt-get -y install gcc g++ cmake libnuma-dev
-
-      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v4
-        with:
-          repository: vllm-project/vllm
-          ref: ${{ inputs.vllm }}
-          path: ./vllm-empty
-          fetch-depth: 1
-
-      - name: Install vllm-project/vllm from source
-        working-directory: ./vllm-empty
-        run: |
-          VLLM_TARGET_DEVICE=empty pip install -e .
-
-      - name: Install vllm-project/vllm-ascend
-        env:
-          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
-        run: |
-          pip install -r requirements-dev.txt
-          pip install -v -e .
-
-      - name: Run vllm-project/vllm-ascend test (light)
-        env:
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
-        if: ${{ inputs.type == 'light' }}
-        run: |
-          pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
-
-      - name: Run vllm-project/vllm-ascend test (full)
-        env:
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
-        if: ${{ inputs.type == 'full' }}
-        run: |
-          pytest -sv tests/e2e/multicard/test_data_parallel.py
-          pytest -sv tests/e2e/multicard/test_expert_parallel.py
-          # external_launcher test is not stable enough. Fix it later
-          # pytest -sv tests/e2e/multicard/test_external_launcher.py
-          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
-
-          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
-
-          #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
-          pytest -sv tests/e2e/multicard/test_prefix_caching.py
-          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml
@@ -37,40 +37,11 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  changes:
-    runs-on: ubuntu-latest
-    if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
-    outputs:
-      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
-      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dorny/paths-filter@v3
-        id: filter
-        with:
-          filters: |
-            e2e_tracker:
-              - '.github/workflows/vllm_ascend_test.yaml'
-              - 'vllm_ascend/**'
-              - 'csrc/**'
-              - 'cmake/**'
-              - 'tests/e2e/**'
-              - 'CMakeLists.txt'
-              - 'setup.py'
-              - 'requirements.txt'
-              - 'requirements-dev.txt'
-              - 'requirements-lint.txt'
-              - 'packages.txt'
-            ut_tracker:
-              - 'tests/ut/**'
-
   e2e-test:
     name: e2e-full
     strategy:
       matrix:
         vllm_version: [releases/v0.11.0, v0.10.2]
-    needs: [changes]
-    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
     uses: ./.github/workflows/_e2e_test.yaml
     with:
       vllm: ${{ matrix.vllm_version }}