Skip to content

Commit 12447b9

Browse files
committed
Merge branch 'main' into amd/gfx950_skinny_gemm
2 parents d9da93f + 643622b commit 12447b9

File tree

931 files changed

+32241
-19077
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

931 files changed

+32241
-19077
lines changed

.buildkite/pyproject.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@
66

77
[tool.ruff]
88
line-length = 88
9-
exclude = [
10-
# External file, leaving license intact
11-
"examples/other/fp8/quantizer/quantize.py",
12-
"vllm/vllm_flash_attn/flash_attn_interface.pyi"
13-
]
149

1510
[tool.ruff.lint.per-file-ignores]
1611
"vllm/third_party/**" = ["ALL"]

.buildkite/release-pipeline.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ steps:
1414
agents:
1515
queue: cpu_queue_postmerge
1616
commands:
17-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.6.3 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
17+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.6.3 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
1818
- "mkdir artifacts"
1919
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
2020
- "bash .buildkite/scripts/upload-wheels.sh"
@@ -31,7 +31,7 @@ steps:
3131
agents:
3232
queue: cpu_queue_postmerge
3333
commands:
34-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
34+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
3535
- "mkdir artifacts"
3636
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
3737
- "bash .buildkite/scripts/upload-wheels.sh"
@@ -64,7 +64,7 @@ steps:
6464
- "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
6565
plugins:
6666
- docker-login#v3.0.0:
67-
username: vllm
67+
username: vllmbot
6868
password-env: DOCKERHUB_TOKEN
6969
env:
7070
DOCKER_BUILDKIT: "1"

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"*
8282
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"}
8383
fi
8484

85+
if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then
86+
commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
87+
fi
88+
89+
if [[ $commands == *"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"* ]]; then
90+
commands=${commands//"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"/"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2 and not BambaForCausalLM and not Gemma2ForCausalLM and not Grok1ModelForCausalLM and not Zamba2ForCausalLM and not Gemma2Model and not GritLM'"}
91+
fi
92+
8593
if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
8694
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
8795
fi

.buildkite/scripts/hardware_ci/run-hpu-test.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@ docker build -t hpu-test-env -f docker/Dockerfile.hpu .
1010
# Setup cleanup
1111
# certain versions of HPU software stack have a bug that can
1212
# override the exit code of the script, so we need to use
13-
# separate remove_docker_container and remove_docker_container_and_exit
13+
# separate remove_docker_containers and remove_docker_containers_and_exit
1414
# functions, while other platforms only need one remove_docker_container
1515
# function.
1616
EXITCODE=1
17-
remove_docker_container() { docker rm -f hpu-test || true; }
18-
remove_docker_container_and_exit() { remove_docker_container; exit $EXITCODE; }
19-
trap remove_docker_container_and_exit EXIT
20-
remove_docker_container
17+
remove_docker_containers() { docker rm -f hpu-test || true; docker rm -f hpu-test-tp2 || true; }
18+
remove_docker_containers_and_exit() { remove_docker_containers; exit $EXITCODE; }
19+
trap remove_docker_containers_and_exit EXIT
20+
remove_docker_containers
2121

2222
# Run the image and launch offline inference
2323
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
24+
docker run --runtime=habana --name=hpu-test-tp2 --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --tensor-parallel-size 2
25+
2426
EXITCODE=$?

.buildkite/scripts/hardware_ci/run-neuron-test.sh

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@ container_name="neuron_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
1111
HF_CACHE="$(realpath ~)/huggingface"
1212
mkdir -p "${HF_CACHE}"
1313
HF_MOUNT="/root/.cache/huggingface"
14+
HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "ci/vllm-neuron/hf-token" --region us-west-2 --query 'SecretString' --output text | jq -r .VLLM_NEURON_CI_HF_TOKEN)
1415

1516
NEURON_COMPILE_CACHE_URL="$(realpath ~)/neuron_compile_cache"
1617
mkdir -p "${NEURON_COMPILE_CACHE_URL}"
1718
NEURON_COMPILE_CACHE_MOUNT="/root/.cache/neuron_compile_cache"
1819

1920
# Try building the docker image
20-
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com
21+
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws
2122

2223
# prune old image and containers to save disk space, and only once a day
2324
# by using a timestamp file in tmp.
@@ -47,8 +48,16 @@ trap remove_docker_container EXIT
4748
docker run --rm -it --device=/dev/neuron0 --network bridge \
4849
-v "${HF_CACHE}:${HF_MOUNT}" \
4950
-e "HF_HOME=${HF_MOUNT}" \
51+
-e "HF_TOKEN=${HF_TOKEN}" \
5052
-v "${NEURON_COMPILE_CACHE_URL}:${NEURON_COMPILE_CACHE_MOUNT}" \
5153
-e "NEURON_COMPILE_CACHE_URL=${NEURON_COMPILE_CACHE_MOUNT}" \
5254
--name "${container_name}" \
5355
${image_name} \
54-
/bin/bash -c "python3 /workspace/vllm/examples/offline_inference/neuron.py && python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys && python3 -m pytest /workspace/vllm/tests/neuron/2_core/ -v --capture=tee-sys"
56+
/bin/bash -c "
57+
python3 /workspace/vllm/examples/offline_inference/neuron.py;
58+
python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys;
59+
for f in /workspace/vllm/tests/neuron/2_core/*.py; do
60+
echo 'Running test file: '$f;
61+
python3 -m pytest \$f -v --capture=tee-sys;
62+
done
63+
"

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 131 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -13,91 +13,138 @@ remove_docker_container
1313

1414
# For HF_TOKEN.
1515
source /etc/environment
16-
# Run a simple end-to-end example.
16+
1717
docker run --privileged --net host --shm-size=16G -it \
1818
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
19-
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
20-
&& python3 -m pip install pytest pytest-asyncio tpu-info \
21-
&& python3 -m pip install lm_eval[api]==0.4.4 \
22-
&& export VLLM_XLA_CACHE_PATH= \
23-
&& export VLLM_USE_V1=1 \
24-
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
25-
&& echo HARDWARE \
26-
&& tpu-info \
27-
&& { \
28-
echo TEST_0: Running test_perf.py; \
29-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_perf.py; \
30-
echo TEST_0_EXIT_CODE: \$?; \
31-
} & \
32-
{ \
33-
echo TEST_1: Running test_compilation.py; \
34-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py; \
35-
echo TEST_1_EXIT_CODE: \$?; \
36-
} & \
37-
{ \
38-
echo TEST_2: Running test_basic.py; \
39-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py; \
40-
echo TEST_2_EXIT_CODE: \$?; \
41-
} & \
42-
{ \
43-
echo TEST_3: Running test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
44-
python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
45-
echo TEST_3_EXIT_CODE: \$?; \
46-
} & \
47-
{ \
48-
echo TEST_4: Running test_quantization_accuracy.py; \
49-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py; \
50-
echo TEST_4_EXIT_CODE: \$?; \
51-
} & \
52-
{ \
53-
echo TEST_5: Running examples/offline_inference/tpu.py; \
54-
python3 /workspace/vllm/examples/offline_inference/tpu.py; \
55-
echo TEST_5_EXIT_CODE: \$?; \
56-
} & \
57-
{ \
58-
echo TEST_6: Running test_tpu_model_runner.py; \
59-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py; \
60-
echo TEST_6_EXIT_CODE: \$?; \
61-
} & \
62-
{ \
63-
echo TEST_7: Running test_sampler.py; \
64-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py; \
65-
echo TEST_7_EXIT_CODE: \$?; \
66-
} & \
67-
{ \
68-
echo TEST_8: Running test_topk_topp_sampler.py; \
69-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py; \
70-
echo TEST_8_EXIT_CODE: \$?; \
71-
} & \
72-
{ \
73-
echo TEST_9: Running test_multimodal.py; \
74-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py; \
75-
echo TEST_9_EXIT_CODE: \$?; \
76-
} & \
77-
{ \
78-
echo TEST_10: Running test_pallas.py; \
79-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py; \
80-
echo TEST_10_EXIT_CODE: \$?; \
81-
} & \
82-
{ \
83-
echo TEST_11: Running test_struct_output_generate.py; \
84-
python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py; \
85-
echo TEST_11_EXIT_CODE: \$?; \
86-
} & \
87-
{ \
88-
echo TEST_12: Running test_moe_pallas.py; \
89-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py; \
90-
echo TEST_12_EXIT_CODE: \$?; \
91-
} & \
92-
# Disable the TPU LoRA tests until the feature is activated
93-
# & { \
94-
# echo TEST_13: Running test_moe_pallas.py; \
95-
# python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/; \
96-
# echo TEST_13_EXIT_CODE: \$?; \
97-
# } & \
98-
wait \
99-
&& echo 'All tests have attempted to run. Check logs for individual test statuses and exit codes.' \
100-
"
19+
vllm-tpu /bin/bash -c '
20+
set -e # Exit immediately if a command exits with a non-zero status.
21+
set -u # Treat unset variables as an error.
22+
23+
echo "--- Starting script inside Docker container ---"
24+
25+
# Create results directory
26+
RESULTS_DIR=$(mktemp -d)
27+
# If mktemp fails, set -e will cause the script to exit.
28+
echo "Results will be stored in: $RESULTS_DIR"
29+
30+
# Install dependencies
31+
echo "--- Installing Python dependencies ---"
32+
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
33+
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
34+
&& python3 -m pip install --progress-bar off lm_eval[api]==0.4.4
35+
echo "--- Python dependencies installed ---"
36+
export VLLM_USE_V1=1
37+
export VLLM_XLA_CHECK_RECOMPILATION=1
38+
export VLLM_XLA_CACHE_PATH=
39+
echo "Using VLLM V1"
40+
41+
echo "--- Hardware Information ---"
42+
tpu-info
43+
echo "--- Starting Tests ---"
44+
set +e
45+
overall_script_exit_code=0
46+
47+
# --- Test Definitions ---
48+
# If a test fails, this function will print logs and will not cause the main script to exit.
49+
run_test() {
50+
local test_num=$1
51+
local test_name=$2
52+
local test_command=$3
53+
local log_file="$RESULTS_DIR/test_${test_num}.log"
54+
local actual_exit_code
55+
56+
echo "--- TEST_$test_num: Running $test_name ---"
57+
58+
# Execute the test command.
59+
eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
60+
actual_exit_code=$?
61+
62+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
63+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
64+
65+
if [ "$actual_exit_code" -ne 0 ]; then
66+
echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
67+
echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
68+
if [ -f "$log_file" ]; then
69+
cat "$log_file" >&2
70+
else
71+
echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
72+
fi
73+
echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
74+
return "$actual_exit_code" # Return the failure code
75+
else
76+
echo "TEST_$test_num ($test_name) PASSED."
77+
return 0 # Return success
78+
fi
79+
}
80+
81+
# Helper function to call run_test and update the overall script exit code
82+
run_and_track_test() {
83+
local test_num_arg="$1"
84+
local test_name_arg="$2"
85+
local test_command_arg="$3"
86+
87+
# Run the test
88+
run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
89+
local test_specific_exit_code=$?
90+
91+
# If the test failed, set the overall script exit code to 1
92+
if [ "$test_specific_exit_code" -ne 0 ]; then
93+
# No need for extra echo here, run_test already logged the failure.
94+
overall_script_exit_code=1
95+
fi
96+
}
97+
98+
# --- Actual Test Execution ---
99+
run_and_track_test 0 "test_perf.py" \
100+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py"
101+
run_and_track_test 1 "test_compilation.py" \
102+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py"
103+
run_and_track_test 2 "test_basic.py" \
104+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
105+
run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
106+
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
107+
run_and_track_test 4 "test_quantization_accuracy.py" \
108+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
109+
run_and_track_test 5 "examples/offline_inference/tpu.py" \
110+
"python3 /workspace/vllm/examples/offline_inference/tpu.py"
111+
run_and_track_test 6 "test_tpu_model_runner.py" \
112+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
113+
run_and_track_test 7 "test_sampler.py" \
114+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
115+
run_and_track_test 8 "test_topk_topp_sampler.py" \
116+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py"
117+
run_and_track_test 9 "test_multimodal.py" \
118+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py"
119+
run_and_track_test 10 "test_pallas.py" \
120+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py"
121+
run_and_track_test 11 "test_struct_output_generate.py" \
122+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py"
123+
run_and_track_test 12 "test_moe_pallas.py" \
124+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
125+
run_and_track_test 13 "test_lora.py" \
126+
"VLLM_XLA_CHECK_RECOMPILATION=0 python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/test_lora.py"
127+
128+
# After all tests have been attempted, exit with the overall status.
129+
if [ "$overall_script_exit_code" -ne 0 ]; then
130+
echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
131+
else
132+
echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
133+
fi
134+
exit "$overall_script_exit_code"
135+
' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
136+
137+
# Capture the exit code of the docker run command
138+
DOCKER_RUN_EXIT_CODE=$?
101139

140+
# The trap will run for cleanup.
141+
# Exit the main script with the Docker run command's exit code.
142+
if [ "$DOCKER_RUN_EXIT_CODE" -ne 0 ]; then
143+
echo "Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE."
144+
exit "$DOCKER_RUN_EXIT_CODE"
145+
else
146+
echo "Docker run command completed successfully."
147+
exit 0
148+
fi
102149
# TODO: This test fails because it uses RANDOM_SEED sampling
103-
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
150+
# pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \

.buildkite/scripts/upload-wheels.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,4 @@ else
7575
fi
7676

7777
aws s3 cp "$wheel" "s3://vllm-wheels/$version/"
78+
aws s3 cp index.html "s3://vllm-wheels/$version/vllm/index.html"

0 commit comments

Comments
 (0)