diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 1254f3a2ff..62ac6f20ac 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -82,7 +82,9 @@ jobs: env: VLLM_WORKER_MULTIPROC_METHOD: spawn VLLM_USE_MODELSCOPE: True - PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + NPU_MEMORY_FRACTION: 0.96 + OMP_NUM_THREADS: 1 if: ${{ inputs.type == 'full' }} run: | # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run @@ -103,8 +105,7 @@ jobs: # ------------------------------------ v1 spec decode test ------------------------------------ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py - # Fix me: OOM error - #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py pytest -sv tests/e2e/singlecard/ops/ diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 50527d1945..3077c2c477 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -155,4 +155,4 @@ jobs: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - type: light + type: light \ No newline at end of file diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 1c9f4d123d..c6a3260ed3 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -37,43 +37,15 @@ concurrency: cancel-in-progress: true jobs: - changes: - runs-on: ubuntu-latest - if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }} - outputs: - e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} - ut_tracker: ${{ steps.filter.outputs.ut_tracker }} - steps: - - uses: actions/checkout@v4 - - uses: dorny/paths-filter@v3 - id: filter - with: - filters: | - e2e_tracker: - - '.github/workflows/vllm_ascend_test.yaml' - - 'vllm_ascend/**' - - 'csrc/**' - - 'cmake/**' - - 'tests/e2e/**' - - 'CMakeLists.txt' - - 'setup.py' - - 'requirements.txt' - - 'requirements-dev.txt' - - 'requirements-lint.txt' - - 'packages.txt' - ut_tracker: - - 'tests/ut/**' - e2e-test: name: e2e-full strategy: matrix: vllm_version: [releases/v0.11.0, v0.11.0rc3] - needs: [changes] - if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 type: full +