Skip to content

Commit 97ea83c

Browse files
authored
Merge branch 'vllm-project:main' into feat_conprefill
2 parents 55cafcb + f0be3ee commit 97ea83c

File tree

151 files changed

+15607
-4183
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

151 files changed

+15607
-4183
lines changed

.gemini/config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
2+
have_fun: false # Just review the code
3+
code_review:
4+
comment_severity_threshold: HIGH # Reduce quantity of comments
5+
pull_request_opened:
6+
summary: false # Don't summarize the PR in a separate comment

.github/ISSUE_TEMPLATE/750-RFC.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ body:
4040
attributes:
4141
label: Any Other Things.
4242
description: >
43-
Any other things you would like to mention.
43+
Any other things you would like to mention, such as feature branch request.
4444
validations:
4545
required: false
4646
- type: markdown

.github/workflows/accuracy_test.yaml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ jobs:
7070
runner: linux-aarch64-a2-1
7171
- model_name: Qwen3-30B-A3B
7272
runner: linux-aarch64-a2-2
73+
- model_name: DeepSeek-V2-Lite
74+
runner: linux-aarch64-a2-2
7375
fail-fast: false
7476

7577
name: ${{ matrix.model_name }} accuracy
@@ -200,9 +202,8 @@ jobs:
200202
markdown_name="${model_base_name}"
201203
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
202204
mkdir -p ./benchmarks/accuracy
203-
pytest -sv ./tests/e2e/singlecard/models/test_lm_eval_correctness.py \
204-
--config ./tests/e2e/singlecard/models/configs/${{ matrix.model_name }}.yaml \
205-
--report_output ./benchmarks/accuracy/${model_base_name}.md
205+
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
206+
--config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
206207
207208
- name: Generate step summary
208209
if: ${{ always() }}
@@ -225,14 +226,14 @@ jobs:
225226

226227
outputs:
227228
model_name: ${{ steps.set_output.outputs.model_name }}
228-
229+
vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }}
230+
229231
create_pr:
230232
runs-on: ubuntu-latest
231233
needs: accuracy_tests
232234
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
233235
env:
234236
UPSTREAM_REPO: vllm-project/vllm-ascend
235-
236237
steps:
237238
- name: Checkout repository
238239
uses: actions/checkout@v4
@@ -257,10 +258,10 @@ jobs:
257258
TIMESTAMP=$(date +%Y%m%d%H%M%S)
258259
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
259260
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
260-
git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
261+
git checkout -B "${BRANCH_NAME}" upstream/main
261262
262263
- name: Download only current run reports
263-
uses: actions/download-artifact@v4
264+
uses: actions/download-artifact@v5
264265
with:
265266
path: ./docs/source/developer_guide/evaluation/accuracy_report
266267
pattern: report-*
@@ -298,7 +299,7 @@ jobs:
298299
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
299300
run: |
300301
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
301-
git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
302+
git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
302303
git push -f origin "${{ env.BRANCH_NAME }}"
303304
304305
- name: Create PR in upstream via API
@@ -310,9 +311,9 @@ jobs:
310311
owner: 'vllm-project',
311312
repo: 'vllm-ascend',
312313
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
313-
base: '${{ github.event.inputs.vllm-ascend-version }}',
314-
title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
315-
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)
314+
base: 'main',
315+
title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
316+
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
316317
317318
- [Workflow run][1]
318319

.github/workflows/format_pr_body.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4747
4848
- name: Checkout repository
49-
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
49+
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
5050

5151
- name: Set up Python
5252
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0

.github/workflows/release_code.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
matrix:
4444
python-version: ["3.11"]
4545
steps:
46-
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
46+
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
4747

4848
- name: Print
4949
run: |

.github/workflows/release_whl.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
) }}
5353
runs-on: ${{ matrix.os }}
5454
steps:
55-
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
55+
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
5656

5757
- name: Print
5858
run: |

.github/workflows/vllm_ascend_test.yaml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
VLLM_USE_MODELSCOPE: True
8282
strategy:
8383
matrix:
84-
vllm_version: [main, v0.10.0]
84+
vllm_version: [v0.10.1.1, main]
8585
steps:
8686
- name: Install packages
8787
run: |
@@ -137,7 +137,7 @@ jobs:
137137
max-parallel: 2
138138
matrix:
139139
os: [linux-aarch64-a2-1]
140-
vllm_version: [main, v0.10.0]
140+
vllm_version: [v0.10.1.1, main]
141141
name: singlecard e2e test
142142
runs-on: ${{ matrix.os }}
143143
container:
@@ -192,7 +192,7 @@ jobs:
192192
VLLM_USE_MODELSCOPE: True
193193
run: |
194194
pytest -sv tests/e2e/singlecard/test_offline_inference.py
195-
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
195+
# pytest -sv tests/e2e/singlecard/test_ilama_lora.py
196196
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
197197
pytest -sv tests/e2e/singlecard/test_camem.py
198198
pytest -sv tests/e2e/singlecard/test_embedding.py
@@ -211,16 +211,15 @@ jobs:
211211
--ignore=tests/e2e/singlecard/test_embedding.py \
212212
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
213213
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \
214-
--ignore=tests/e2e/singlecard/test_offline_inference_310p.py \
215-
--ignore=tests/e2e/singlecard/models/test_lm_eval_correctness.py
214+
--ignore=tests/e2e/singlecard/test_offline_inference_310p.py
216215
e2e-2-cards:
217216
needs: [e2e]
218217
if: ${{ needs.e2e.result == 'success' }}
219218
strategy:
220219
max-parallel: 2
221220
matrix:
222221
os: [linux-aarch64-a2-2]
223-
vllm_version: [main, v0.10.0]
222+
vllm_version: [v0.10.1.1, main]
224223
name: multicard e2e test
225224
runs-on: ${{ matrix.os }}
226225
container:
@@ -274,12 +273,11 @@ jobs:
274273
VLLM_WORKER_MULTIPROC_METHOD: spawn
275274
VLLM_USE_MODELSCOPE: True
276275
run: |
277-
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
276+
# pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
278277
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
279278
# To avoid oom, we need to run the test in a single process.
280279
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
281280
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
282-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
283281
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
284282
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_alltoallv
285283
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC

.github/workflows/vllm_ascend_test_310p.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
max-parallel: 2
5454
matrix:
5555
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
56-
vllm_version: [main, v0.10.0]
56+
vllm_version: [v0.10.1.1, main]
5757
name: 310p e2e test
5858
runs-on: ${{ matrix.os }}
5959
container:

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 0 additions & 102 deletions
This file was deleted.

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
3737

3838
# Install vLLM
3939
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40-
ARG VLLM_TAG=v0.10.0
40+
ARG VLLM_TAG=v0.10.1.1
4141
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4242
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
4343
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

0 commit comments

Comments
 (0)