Skip to content

Commit 3712649

Browse files
authored
Merge pull request ROCm#565 from ROCm/upstream_merge_2025_06_02
Upstream merge 2025 06 02
2 parents 7bb0618 + 9d4c238 commit 3712649

File tree

252 files changed

+8487
-4863
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

252 files changed

+8487
-4863
lines changed

.buildkite/nightly-benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ WARNING: The benchmarking script will save json results by itself, so please do
113113

114114
### Visualizing the results
115115

116-
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](tests/descriptions.md) with real benchmarking results.
116+
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](performance-benchmarks-descriptions.md) with real benchmarking results.
117117
You can find the result presented as a table inside the `buildkite/performance-benchmark` job page.
118118
If you do not see the table, please wait till the benchmark finish running.
119119
The json version of the table (together with the json version of the benchmark) will be also attached to the markdown file.

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 166 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2,102 +2,180 @@
22

33
set -xu
44

5-
# Build the docker image.
6-
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
75

8-
# Set up cleanup.
9-
remove_docker_container() { docker rm -f tpu-test || true; }
6+
remove_docker_container() {
7+
docker rm -f tpu-test || true;
8+
docker rm -f vllm-tpu || true;
9+
}
10+
1011
trap remove_docker_container EXIT
12+
1113
# Remove the container that might not be cleaned up in the previous run.
1214
remove_docker_container
1315

16+
# Build the docker image.
17+
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
18+
19+
# Set up cleanup.
20+
cleanup_docker() {
21+
# Get Docker's root directory
22+
docker_root=$(docker info -f '{{.DockerRootDir}}')
23+
if [ -z "$docker_root" ]; then
24+
echo "Failed to determine Docker root directory."
25+
exit 1
26+
fi
27+
echo "Docker root directory: $docker_root"
28+
# Check disk usage of the filesystem where Docker's root directory is located
29+
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
30+
# Define the threshold
31+
threshold=70
32+
if [ "$disk_usage" -gt "$threshold" ]; then
33+
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
34+
# Remove dangling images (those that are not tagged and not used by any container)
35+
docker image prune -f
36+
# Remove unused volumes / force the system prune for old images as well.
37+
docker volume prune -f && docker system prune --force --filter "until=72h" --all
38+
echo "Docker images and volumes cleanup completed."
39+
else
40+
echo "Disk usage is below $threshold%. No cleanup needed."
41+
fi
42+
}
43+
cleanup_docker
44+
1445
# For HF_TOKEN.
1546
source /etc/environment
16-
# Run a simple end-to-end example.
47+
1748
docker run --privileged --net host --shm-size=16G -it \
1849
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
19-
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
20-
&& python3 -m pip install pytest pytest-asyncio tpu-info \
21-
&& python3 -m pip install lm_eval[api]==0.4.4 \
22-
&& export VLLM_XLA_CACHE_PATH= \
23-
&& export VLLM_USE_V1=1 \
24-
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
25-
&& echo HARDWARE \
26-
&& tpu-info \
27-
&& { \
28-
echo TEST_0: Running test_perf.py; \
29-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_perf.py; \
30-
echo TEST_0_EXIT_CODE: \$?; \
31-
} & \
32-
{ \
33-
echo TEST_1: Running test_compilation.py; \
34-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py; \
35-
echo TEST_1_EXIT_CODE: \$?; \
36-
} & \
37-
{ \
38-
echo TEST_2: Running test_basic.py; \
39-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py; \
40-
echo TEST_2_EXIT_CODE: \$?; \
41-
} & \
42-
{ \
43-
echo TEST_3: Running test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
44-
python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
45-
echo TEST_3_EXIT_CODE: \$?; \
46-
} & \
47-
{ \
48-
echo TEST_4: Running test_quantization_accuracy.py; \
49-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py; \
50-
echo TEST_4_EXIT_CODE: \$?; \
51-
} & \
52-
{ \
53-
echo TEST_5: Running examples/offline_inference/tpu.py; \
54-
python3 /workspace/vllm/examples/offline_inference/tpu.py; \
55-
echo TEST_5_EXIT_CODE: \$?; \
56-
} & \
57-
{ \
58-
echo TEST_6: Running test_tpu_model_runner.py; \
59-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py; \
60-
echo TEST_6_EXIT_CODE: \$?; \
61-
} & \
62-
{ \
63-
echo TEST_7: Running test_sampler.py; \
64-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py; \
65-
echo TEST_7_EXIT_CODE: \$?; \
66-
} & \
67-
{ \
68-
echo TEST_8: Running test_topk_topp_sampler.py; \
69-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py; \
70-
echo TEST_8_EXIT_CODE: \$?; \
71-
} & \
72-
{ \
73-
echo TEST_9: Running test_multimodal.py; \
74-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py; \
75-
echo TEST_9_EXIT_CODE: \$?; \
76-
} & \
77-
{ \
78-
echo TEST_10: Running test_pallas.py; \
79-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py; \
80-
echo TEST_10_EXIT_CODE: \$?; \
81-
} & \
82-
{ \
83-
echo TEST_11: Running test_struct_output_generate.py; \
84-
python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py; \
85-
echo TEST_11_EXIT_CODE: \$?; \
86-
} & \
87-
{ \
88-
echo TEST_12: Running test_moe_pallas.py; \
89-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py; \
90-
echo TEST_12_EXIT_CODE: \$?; \
91-
} & \
92-
# Disable the TPU LoRA tests until the feature is activated
93-
# & { \
94-
# echo TEST_13: Running test_moe_pallas.py; \
95-
# python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/; \
96-
# echo TEST_13_EXIT_CODE: \$?; \
97-
# } & \
98-
wait \
99-
&& echo 'All tests have attempted to run. Check logs for individual test statuses and exit codes.' \
100-
"
50+
vllm-tpu /bin/bash -c '
51+
set -e # Exit immediately if a command exits with a non-zero status.
52+
set -u # Treat unset variables as an error.
53+
54+
echo "--- Starting script inside Docker container ---"
55+
56+
# Create results directory
57+
RESULTS_DIR=$(mktemp -d)
58+
# If mktemp fails, set -e will cause the script to exit.
59+
echo "Results will be stored in: $RESULTS_DIR"
60+
61+
# Install dependencies
62+
echo "--- Installing Python dependencies ---"
63+
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
64+
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
65+
&& python3 -m pip install --progress-bar off lm_eval[api]==0.4.4
66+
echo "--- Python dependencies installed ---"
67+
export VLLM_USE_V1=1
68+
export VLLM_XLA_CHECK_RECOMPILATION=1
69+
export VLLM_XLA_CACHE_PATH=
70+
echo "Using VLLM V1"
71+
72+
echo "--- Hardware Information ---"
73+
tpu-info
74+
echo "--- Starting Tests ---"
75+
set +e
76+
overall_script_exit_code=0
77+
78+
# --- Test Definitions ---
79+
# If a test fails, this function will print logs and will not cause the main script to exit.
80+
run_test() {
81+
local test_num=$1
82+
local test_name=$2
83+
local test_command=$3
84+
local log_file="$RESULTS_DIR/test_${test_num}.log"
85+
local actual_exit_code
86+
87+
echo "--- TEST_$test_num: Running $test_name ---"
88+
89+
# Execute the test command.
90+
eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
91+
actual_exit_code=$?
92+
93+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
94+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
95+
96+
if [ "$actual_exit_code" -ne 0 ]; then
97+
echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
98+
echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
99+
if [ -f "$log_file" ]; then
100+
cat "$log_file" >&2
101+
else
102+
echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
103+
fi
104+
echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
105+
return "$actual_exit_code" # Return the failure code
106+
else
107+
echo "TEST_$test_num ($test_name) PASSED."
108+
return 0 # Return success
109+
fi
110+
}
111+
112+
# Helper function to call run_test and update the overall script exit code
113+
run_and_track_test() {
114+
local test_num_arg="$1"
115+
local test_name_arg="$2"
116+
local test_command_arg="$3"
117+
118+
# Run the test
119+
run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
120+
local test_specific_exit_code=$?
121+
122+
# If the test failed, set the overall script exit code to 1
123+
if [ "$test_specific_exit_code" -ne 0 ]; then
124+
# No need for extra echo here, run_test already logged the failure.
125+
overall_script_exit_code=1
126+
fi
127+
}
128+
129+
# --- Actual Test Execution ---
130+
run_and_track_test 0 "test_perf.py" \
131+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py"
132+
run_and_track_test 1 "test_compilation.py" \
133+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py"
134+
run_and_track_test 2 "test_basic.py" \
135+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
136+
run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
137+
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
138+
run_and_track_test 4 "test_quantization_accuracy.py" \
139+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
140+
run_and_track_test 5 "examples/offline_inference/tpu.py" \
141+
"python3 /workspace/vllm/examples/offline_inference/tpu.py"
142+
run_and_track_test 6 "test_tpu_model_runner.py" \
143+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
144+
run_and_track_test 7 "test_sampler.py" \
145+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
146+
run_and_track_test 8 "test_topk_topp_sampler.py" \
147+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py"
148+
run_and_track_test 9 "test_multimodal.py" \
149+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py"
150+
run_and_track_test 10 "test_pallas.py" \
151+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py"
152+
run_and_track_test 11 "test_struct_output_generate.py" \
153+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py"
154+
run_and_track_test 12 "test_moe_pallas.py" \
155+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
156+
run_and_track_test 13 "test_lora.py" \
157+
"VLLM_XLA_CHECK_RECOMPILATION=0 python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/test_lora.py"
158+
159+
# After all tests have been attempted, exit with the overall status.
160+
if [ "$overall_script_exit_code" -ne 0 ]; then
161+
echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
162+
else
163+
echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
164+
fi
165+
exit "$overall_script_exit_code"
166+
' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
167+
168+
# Capture the exit code of the docker run command
169+
DOCKER_RUN_EXIT_CODE=$?
101170

171+
# The trap will run for cleanup.
172+
# Exit the main script with the Docker run command's exit code.
173+
if [ "$DOCKER_RUN_EXIT_CODE" -ne 0 ]; then
174+
echo "Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE."
175+
exit "$DOCKER_RUN_EXIT_CODE"
176+
else
177+
echo "Docker run command completed successfully."
178+
exit 0
179+
fi
102180
# TODO: This test fails because it uses RANDOM_SEED sampling
103-
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
181+
# pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \

.buildkite/test-pipeline.yaml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,9 @@ steps:
202202
- tests/test_sequence
203203
- tests/test_config
204204
- tests/test_logger
205+
- tests/test_vllm_port
205206
commands:
206-
- pytest -v -s engine test_sequence.py test_config.py test_logger.py
207+
- pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
207208
# OOM in the CI unless we run this separately
208209
- pytest -v -s tokenization
209210
working_dir: "/vllm-workspace/tests" # optional
@@ -279,18 +280,6 @@ steps:
279280
- pytest -v -s samplers
280281
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
281282

282-
- label: LogitsProcessor Test # 5min
283-
working_dir: "/vllm-workspace/tests"
284-
mirror_hardwares: [amdexperimental, amdproduction]
285-
source_file_dependencies:
286-
- vllm/model_executor/layers
287-
- vllm/model_executor/guided_decoding
288-
- tests/test_logits_processor
289-
- tests/model_executor/test_guided_processors
290-
commands:
291-
- pytest -v -s test_logits_processor.py
292-
- pytest -v -s model_executor/test_guided_processors.py
293-
294283
- label: Speculative decoding tests # 40min
295284
mirror_hardwares: [amdexperimental]
296285
source_file_dependencies:
@@ -408,6 +397,17 @@ steps:
408397
- pytest -v -s tensorizer_loader
409398
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
410399

400+
- label: Model Executor Test
401+
mirror_hardwares: [amdexperimental, amdproduction]
402+
soft_fail: true
403+
source_file_dependencies:
404+
- vllm/model_executor
405+
- tests/model_executor
406+
commands:
407+
- apt-get update && apt-get install -y curl libsodium23
408+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
409+
- pytest -v -s model_executor
410+
411411
- label: Benchmarks # 9min
412412
mirror_hardwares: [amdexperimental, amdproduction]
413413
working_dir: "/vllm-workspace/.buildkite"
@@ -629,9 +629,11 @@ steps:
629629
- vllm/worker/model_runner.py
630630
- entrypoints/llm/test_collective_rpc.py
631631
- tests/v1/test_async_llm_dp.py
632+
- tests/v1/entrypoints/openai/test_multi_api_servers.py
632633
- vllm/v1/engine/
633634
commands:
634635
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
636+
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
635637
- pytest -v -s entrypoints/llm/test_collective_rpc.py
636638
- pytest -v -s ./compile/test_basic_correctness.py
637639
- pytest -v -s ./compile/test_wrapper.py

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ repos:
5858
entry: tools/mypy.sh 0 "local"
5959
language: python
6060
types: [python]
61-
additional_dependencies: &mypy_deps [mypy==1.11.1, types-cachetools, types-setuptools<80.7, types-PyYAML, types-requests]
61+
additional_dependencies: &mypy_deps [mypy==1.11.1, types-cachetools, types-setuptools, types-PyYAML, types-requests, pydantic]
6262
stages: [pre-commit] # Don't run in CI
6363
- id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
6464
name: Run mypy for Python 3.9

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
2323
# Suppress potential warnings about unused manually-specified variables
2424
set(ignoreMe "${VLLM_PYTHON_PATH}")
2525

26+
# Prevent installation of dependencies (cutlass) by default.
27+
install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
28+
2629
#
2730
# Supported python versions. These versions will be searched in order, the
2831
# first match will be selected. These should be kept in sync with setup.py.
@@ -788,5 +791,7 @@ endif()
788791
# For CUDA we also build and ship some external projects.
789792
if (VLLM_GPU_LANG STREQUAL "CUDA")
790793
include(cmake/external_projects/flashmla.cmake)
794+
795+
# vllm-flash-attn should be last as it overwrites some CMake functions
791796
include(cmake/external_projects/vllm_flash_attn.cmake)
792797
endif ()

SECURITY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ Please report security issues privately using [the vulnerability submission form
88

99
---
1010

11+
Please see the [Security Guide in the vLLM documentation](https://docs.vllm.ai/en/latest/usage/security.html) for more information on vLLM's security assumptions and recommendations.
12+
1113
Please see [PyTorch's Security Policy](https://github.com/pytorch/pytorch/blob/main/SECURITY.md) for more information and recommendations on how to securely interact with models.

0 commit comments

Comments
 (0)