diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 3a4f3dfab4..8799d61a42 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -85,108 +85,7 @@ jobs: run: | # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run # the test separately. - - pytest -sv tests/e2e/singlecard/test_aclgraph.py - pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py - pytest -sv tests/e2e/singlecard/test_camem.py - pytest -sv tests/e2e/singlecard/test_chunked.py - pytest -sv tests/e2e/singlecard/test_embedding.py - pytest -sv tests/e2e/singlecard/test_guided_decoding.py - pytest -sv tests/e2e/singlecard/test_ilama_lora.py - pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py - pytest -sv tests/e2e/singlecard/test_quantization.py - pytest -sv tests/e2e/singlecard/test_sampler.py - pytest -sv tests/e2e/singlecard/test_vlm.py - - # ------------------------------------ v1 spec decode test ------------------------------------ # + npu-smi info pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py - - pytest -sv tests/e2e/singlecard/ops/ - - e2e-2-cards: - name: multicard - runs-on: ${{ inputs.runner }}-2 - container: - image: ${{ inputs.image }} - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ inputs.vllm }} - path: ./vllm-empty - fetch-depth: 1 - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run vllm-project/vllm-ascend test (light) - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - if: ${{ inputs.type == 'light' }} - run: | - pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP - - - name: Run vllm-project/vllm-ascend test (full) - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - if: ${{ inputs.type == 'full' }} - run: | - pytest -sv tests/e2e/multicard/test_data_parallel.py - pytest -sv tests/e2e/multicard/test_expert_parallel.py - # external_launcher test is not stable enough. Fix it later - # pytest -sv tests/e2e/multicard/test_external_launcher.py - pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py - pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py - - # To avoid oom, we need to run the test in a single process. - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight - - #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py - pytest -sv tests/e2e/multicard/test_prefix_caching.py - pytest -sv tests/e2e/multicard/test_qwen3_moe.py - pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 5f5089dc65..1e862c824d 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -37,40 +37,11 @@ concurrency: cancel-in-progress: true jobs: - changes: - runs-on: ubuntu-latest - if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }} - outputs: - e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} - ut_tracker: ${{ steps.filter.outputs.ut_tracker }} - steps: - - uses: actions/checkout@v4 - - uses: dorny/paths-filter@v3 - id: filter - with: - filters: | - e2e_tracker: - - '.github/workflows/vllm_ascend_test.yaml' - - 'vllm_ascend/**' - - 'csrc/**' - - 'cmake/**' - - 'tests/e2e/**' - - 'CMakeLists.txt' - - 'setup.py' - - 'requirements.txt' - - 'requirements-dev.txt' - - 'requirements-lint.txt' - - 'packages.txt' - ut_tracker: - - 'tests/ut/**' - e2e-test: name: e2e-full strategy: matrix: vllm_version: [releases/v0.11.0, v0.10.2] - needs: [changes] - if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }}