@@ -13,91 +13,138 @@ remove_docker_container
1313
1414# For HF_TOKEN.
1515source /etc/environment
16- # Run a simple end-to-end example.
16+
1717docker run --privileged --net host --shm-size=16G -it \
1818 -e " HF_TOKEN=$HF_TOKEN " --name tpu-test \
19- vllm-tpu /bin/bash -c " python3 -m pip install git+https://github.com/thuml/depyf.git \
20- && python3 -m pip install pytest pytest-asyncio tpu-info \
21- && python3 -m pip install lm_eval[api]==0.4.4 \
22- && export VLLM_XLA_CACHE_PATH= \
23- && export VLLM_USE_V1=1 \
24- && export VLLM_XLA_CHECK_RECOMPILATION=1 \
25- && echo HARDWARE \
26- && tpu-info \
27- && { \
28- echo TEST_0: Running test_perf.py; \
29- python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_perf.py; \
30- echo TEST_0_EXIT_CODE: \$ ?; \
31- } & \
32- { \
33- echo TEST_1: Running test_compilation.py; \
34- python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py; \
35- echo TEST_1_EXIT_CODE: \$ ?; \
36- } & \
37- { \
38- echo TEST_2: Running test_basic.py; \
39- python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py; \
40- echo TEST_2_EXIT_CODE: \$ ?; \
41- } & \
42- { \
43- echo TEST_3: Running test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
44- python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
45- echo TEST_3_EXIT_CODE: \$ ?; \
46- } & \
47- { \
48- echo TEST_4: Running test_quantization_accuracy.py; \
49- python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py; \
50- echo TEST_4_EXIT_CODE: \$ ?; \
51- } & \
52- { \
53- echo TEST_5: Running examples/offline_inference/tpu.py; \
54- python3 /workspace/vllm/examples/offline_inference/tpu.py; \
55- echo TEST_5_EXIT_CODE: \$ ?; \
56- } & \
57- { \
58- echo TEST_6: Running test_tpu_model_runner.py; \
59- python3 -m pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py; \
60- echo TEST_6_EXIT_CODE: \$ ?; \
61- } & \
62- { \
63- echo TEST_7: Running test_sampler.py; \
64- python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py; \
65- echo TEST_7_EXIT_CODE: \$ ?; \
66- } & \
67- { \
68- echo TEST_8: Running test_topk_topp_sampler.py; \
69- python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py; \
70- echo TEST_8_EXIT_CODE: \$ ?; \
71- } & \
72- { \
73- echo TEST_9: Running test_multimodal.py; \
74- python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py; \
75- echo TEST_9_EXIT_CODE: \$ ?; \
76- } & \
77- { \
78- echo TEST_10: Running test_pallas.py; \
79- python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py; \
80- echo TEST_10_EXIT_CODE: \$ ?; \
81- } & \
82- { \
83- echo TEST_11: Running test_struct_output_generate.py; \
84- python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py; \
85- echo TEST_11_EXIT_CODE: \$ ?; \
86- } & \
87- { \
88- echo TEST_12: Running test_moe_pallas.py; \
89- python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py; \
90- echo TEST_12_EXIT_CODE: \$ ?; \
91- } & \
92- # Disable the TPU LoRA tests until the feature is activated
93- # & { \
94- # echo TEST_13: Running test_moe_pallas.py; \
95- # python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/; \
96- # echo TEST_13_EXIT_CODE: \$ ?; \
97- # } & \
98- wait \
99- && echo 'All tests have attempted to run. Check logs for individual test statuses and exit codes.' \
100- "
19+ vllm-tpu /bin/bash -c '
20+ set -e # Exit immediately if a command exits with a non-zero status.
21+ set -u # Treat unset variables as an error.
22+
23+ echo "--- Starting script inside Docker container ---"
24+
25+ # Create results directory
26+ RESULTS_DIR=$(mktemp -d)
27+ # If mktemp fails, set -e will cause the script to exit.
28+ echo "Results will be stored in: $RESULTS_DIR"
29+
30+ # Install dependencies
31+ echo "--- Installing Python dependencies ---"
32+ python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
33+ && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
34+ && python3 -m pip install --progress-bar off lm_eval[api]==0.4.4
35+ echo "--- Python dependencies installed ---"
36+ export VLLM_USE_V1=1
37+ export VLLM_XLA_CHECK_RECOMPILATION=1
38+ export VLLM_XLA_CACHE_PATH=
39+ echo "Using VLLM V1"
40+
41+ echo "--- Hardware Information ---"
42+ tpu-info
43+ echo "--- Starting Tests ---"
44+ set +e
45+ overall_script_exit_code=0
46+
47+ # --- Test Definitions ---
48+ # If a test fails, this function will print logs and will not cause the main script to exit.
49+ run_test() {
50+ local test_num=$1
51+ local test_name=$2
52+ local test_command=$3
53+ local log_file="$RESULTS_DIR/test_${test_num}.log"
54+ local actual_exit_code
55+
56+ echo "--- TEST_$test_num: Running $test_name ---"
57+
58+ # Execute the test command.
59+ eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
60+ actual_exit_code=$?
61+
62+ echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
63+ echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
64+
65+ if [ "$actual_exit_code" -ne 0 ]; then
66+ echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
67+ echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
68+ if [ -f "$log_file" ]; then
69+ cat "$log_file" >&2
70+ else
71+ echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
72+ fi
73+ echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
74+ return "$actual_exit_code" # Return the failure code
75+ else
76+ echo "TEST_$test_num ($test_name) PASSED."
77+ return 0 # Return success
78+ fi
79+ }
80+
81+ # Helper function to call run_test and update the overall script exit code
82+ run_and_track_test() {
83+ local test_num_arg="$1"
84+ local test_name_arg="$2"
85+ local test_command_arg="$3"
86+
87+ # Run the test
88+ run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
89+ local test_specific_exit_code=$?
90+
91+ # If the test failed, set the overall script exit code to 1
92+ if [ "$test_specific_exit_code" -ne 0 ]; then
93+ # No need for extra echo here, run_test already logged the failure.
94+ overall_script_exit_code=1
95+ fi
96+ }
97+
98+ # --- Actual Test Execution ---
99+ run_and_track_test 0 "test_perf.py" \
100+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py"
101+ run_and_track_test 1 "test_compilation.py" \
102+ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py"
103+ run_and_track_test 2 "test_basic.py" \
104+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
105+ run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
106+ "python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
107+ run_and_track_test 4 "test_quantization_accuracy.py" \
108+ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
109+ run_and_track_test 5 "examples/offline_inference/tpu.py" \
110+ "python3 /workspace/vllm/examples/offline_inference/tpu.py"
111+ run_and_track_test 6 "test_tpu_model_runner.py" \
112+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
113+ run_and_track_test 7 "test_sampler.py" \
114+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
115+ run_and_track_test 8 "test_topk_topp_sampler.py" \
116+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py"
117+ run_and_track_test 9 "test_multimodal.py" \
118+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py"
119+ run_and_track_test 10 "test_pallas.py" \
120+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py"
121+ run_and_track_test 11 "test_struct_output_generate.py" \
122+ "python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py"
123+ run_and_track_test 12 "test_moe_pallas.py" \
124+ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
125+ run_and_track_test 13 "test_lora.py" \
126+ "VLLM_XLA_CHECK_RECOMPILATION=0 python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/test_lora.py"
127+
128+ # After all tests have been attempted, exit with the overall status.
129+ if [ "$overall_script_exit_code" -ne 0 ]; then
130+ echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
131+ else
132+ echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
133+ fi
134+ exit "$overall_script_exit_code"
135+ ' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
136+
137+ # Capture the exit code of the docker run command
138+ DOCKER_RUN_EXIT_CODE=$?
101139
140+ # The trap will run for cleanup.
141+ # Exit the main script with the Docker run command's exit code.
142+ if [ " $DOCKER_RUN_EXIT_CODE " -ne 0 ]; then
143+ echo " Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE ."
144+ exit " $DOCKER_RUN_EXIT_CODE "
145+ else
146+ echo " Docker run command completed successfully."
147+ exit 0
148+ fi
102149# TODO: This test fails because it uses RANDOM_SEED sampling
103- # && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
150+ # pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
0 commit comments