Skip to content

Commit fa1162e

Browse files
committed
Adaptiations to vllm-project#6484 and Merge remote-tracking branch 'github/main' into continous_batching_mamba_from_scratch
2 parents 906379d + f519902 commit fa1162e

File tree

113 files changed

+4134
-895
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+4134
-895
lines changed

.buildkite/run-cpu-test-ppc64le.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
1818
# Run basic model test
1919
docker exec cpu-test bash -c "
2020
pip install pytest matplotlib einops transformers_stream_generator
21-
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
21+
pytest -v -s tests/models -m \"not vlm\" \
22+
--ignore=tests/models/test_embedding.py \
23+
--ignore=tests/models/test_oot_registration.py \
24+
--ignore=tests/models/test_registry.py \
25+
--ignore=tests/models/test_jamba.py \
26+
--ignore=tests/models/test_mamba.py \
27+
--ignore=tests/models/test_danube3_4b.py" # Mamba kernels and Danube3-4B on CPU is not supported
2228

2329
# online inference
2430
docker exec cpu-test bash -c "

.buildkite/run-cpu-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ docker exec cpu-test bash -c "
2727
pytest -v -s tests/models/decoder_only/language \
2828
--ignore=tests/models/test_fp8.py \
2929
--ignore=tests/models/decoder_only/language/test_jamba.py \
30+
--ignore=tests/models/decoder_only/language/test_mamba.py \
3031
--ignore=tests/models/decoder_only/language/test_granitemoe.py \
3132
--ignore=tests/models/decoder_only/language/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
3233

.buildkite/test-pipeline.yaml

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,9 @@ steps:
121121
- vllm/core/
122122
- tests/distributed
123123
- tests/spec_decode/e2e/test_integration_dist_tp4
124+
- tests/compile
124125
commands:
126+
- pytest -v -s compile/test_basic_correctness.py
125127
- pytest -v -s distributed/test_pynccl.py
126128
- pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
127129

@@ -231,14 +233,16 @@ steps:
231233
- vllm/
232234
- tests/compile
233235
commands:
234-
- pytest -v -s compile/test_full_graph_smoke.py
236+
- pytest -v -s compile/test_basic_correctness.py
235237

236-
- label: "PyTorch Fullgraph Test" # 18min
237-
source_file_dependencies:
238-
- vllm/
239-
- tests/compile
240-
commands:
241-
- pytest -v -s compile/test_full_graph.py
238+
# TODO: re-write in comparison tests, and fix symbolic shape
239+
# for quantization ops.
240+
# - label: "PyTorch Fullgraph Test" # 18min
241+
# source_file_dependencies:
242+
# - vllm/
243+
# - tests/compile
244+
# commands:
245+
# - pytest -v -s compile/test_full_graph.py
242246

243247
- label: Kernels Test %N # 1h each
244248
mirror_hardwares: [amd]
@@ -343,10 +347,11 @@ steps:
343347
- pytest -v -s models/encoder_decoder/language
344348
- pytest -v -s models/encoder_decoder/vision_language
345349

350+
# This test is used only in PR development phase to test individual models and should never run on main
346351
- label: Custom Models Test
347-
#mirror_hardwares: [amd]
348352
optional: true
349353
commands:
354+
- echo 'Testing custom models...'
350355
# PR authors can temporarily add commands below to test individual models
351356
# e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
352357
# *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
@@ -394,7 +399,7 @@ steps:
394399
- tests/distributed/
395400
- vllm/compilation
396401
commands:
397-
- pytest -v -s ./compile/test_full_graph_multi_gpu.py
402+
- pytest -v -s ./compile/test_basic_correctness.py
398403
- pytest -v -s ./compile/test_wrapper.py
399404
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
400405
- TARGET_TEST_SUITE=L4 VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest basic_correctness/ -v -s -m distributed_2_gpus

.github/CODEOWNERS

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11
# See https://help.github.com/articles/about-codeowners/
22
# for more info about CODEOWNERS file
33

4+
# This lists cover the "core" components of vLLM that require careful review
5+
/vllm/attention/backends/abstract.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
6+
/vllm/core @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
7+
/vllm/engine/llm_engine.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
8+
/vllm/executor/executor_base.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
9+
/vllm/worker/worker_base.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
10+
/vllm/worker/worker.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
11+
/vllm/model_executor/layers/sampler.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
12+
CMakeLists.txt @tlrmchlsmth @WoosukKwon
13+
14+
# Test ownership
415
/tests/async_engine @njhill @robertgshaw2-neuralmagic @simon-mo
516
/tests/test_inputs.py @DarkLight1337 @ywang96
6-
/tests/entrypoints @DarkLight1337 @robertgshaw2-neuralmagic @simon-mo
17+
/tests/entrypoints @DarkLight1337 @robertgshaw2-neuralmagic @simon-mo
718
/tests/models @DarkLight1337 @ywang96
819
/tests/multimodal @DarkLight1337 @ywang96
9-
/tests/prefix_caching @comaniac @KuntaiDu
20+
/tests/prefix_caching @comaniac @KuntaiDu
1021
/tests/spec_decode @njhill @LiuXiaoxuanPKU
11-
/tests/kernels @tlrmchlsmth @WoosukKwon
22+
/tests/kernels @tlrmchlsmth @WoosukKwon
1223
/tests/quantization @mgoin @robertgshaw2-neuralmagic
13-
/.buildkite/lm-eval-harness @mgoin @simon-mo
24+
/.buildkite/lm-eval-harness @mgoin @simon-mo
1425
/tests/distributed/test_multi_node_assignment.py @youkaichao
1526
/tests/distributed/test_pipeline_parallel.py @youkaichao
1627
/tests/distributed/test_same_node.py @youkaichao
17-
/tests/multi_step @alexm-neuralmagic @SolitaryThinker @comaniac
28+
/tests/multi_step @alexm-neuralmagic @comaniac
1829
/tests/weight_loading @mgoin @youkaichao
1930
/tests/basic_correctness/test_chunked_prefill @rkooo567 @comaniac

.github/workflows/actionlint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
runs-on: ubuntu-latest
2929
steps:
3030
- name: "Checkout"
31-
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
31+
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
3232
with:
3333
fetch-depth: 0
3434

.github/workflows/add_label_automerge.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- name: Add label
11-
uses: actions/github-script@v6
11+
uses: actions/github-script@v7
1212
with:
1313
script: |
1414
github.rest.issues.addLabels({

.github/workflows/clang-format.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ jobs:
1717
matrix:
1818
python-version: ["3.11"]
1919
steps:
20-
- uses: actions/checkout@v3
20+
- uses: actions/checkout@v4
2121
- name: Set up Python ${{ matrix.python-version }}
22-
uses: actions/setup-python@v3
22+
uses: actions/setup-python@v5
2323
with:
2424
python-version: ${{ matrix.python-version }}
2525
- name: Install dependencies

.github/workflows/mypy.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ jobs:
1717
matrix:
1818
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
1919
steps:
20-
- uses: actions/checkout@v3
20+
- uses: actions/checkout@v4
2121
- name: Set up Python ${{ matrix.python-version }}
22-
uses: actions/setup-python@v3
22+
uses: actions/setup-python@v5
2323
with:
2424
python-version: ${{ matrix.python-version }}
2525
- name: Install dependencies

.github/workflows/publish.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
upload_url: ${{ steps.create_release.outputs.upload_url }}
2222
steps:
2323
- name: Checkout
24-
uses: actions/checkout@v3
24+
uses: actions/checkout@v4
2525

2626
- name: Extract branch info
2727
shell: bash
@@ -30,7 +30,7 @@ jobs:
3030
3131
- name: Create Release
3232
id: create_release
33-
uses: "actions/github-script@v6"
33+
uses: "actions/github-script@v7"
3434
env:
3535
RELEASE_TAG: ${{ env.release_tag }}
3636
with:
@@ -54,7 +54,7 @@ jobs:
5454

5555
steps:
5656
- name: Checkout
57-
uses: actions/checkout@v3
57+
uses: actions/checkout@v4
5858

5959
- name: Setup ccache
6060
uses: hendrikmuhs/[email protected]
@@ -68,7 +68,7 @@ jobs:
6868
bash -x .github/workflows/scripts/env.sh
6969
7070
- name: Set up Python
71-
uses: actions/setup-python@v4
71+
uses: actions/setup-python@v5
7272
with:
7373
python-version: ${{ matrix.python-version }}
7474

.github/workflows/reminder_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- name: Remind to run full CI on PR
11-
uses: actions/github-script@v6
11+
uses: actions/github-script@v7
1212
with:
1313
script: |
1414
github.rest.issues.createComment({

0 commit comments

Comments
 (0)