@@ -45,15 +45,13 @@ sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.3/' models/v
4545[ -f vllm_baseline_output.pkl ] && rm vllm_baseline_output.pkl
4646RET=0
4747
48- export VLLM_USE_V1=0
49-
5048set +e
5149# Need to generate baseline first, since running 2 vLLM engines causes
5250# memory issues: https://github.com/vllm-project/vllm/issues/2248
53- python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
51+ VLLM_USE_V1=0 python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5452wait $BASELINE_PID
5553
56- python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
54+ VLLM_USE_V1=0 python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5755wait $BASELINE_PID
5856
5957set -e
@@ -85,48 +83,11 @@ set -e
8583kill $SERVER_PID
8684wait $SERVER_PID
8785
88- # Remove old baseline files if they exist
89- [ -f vllm_baseline_output.pkl ] && rm vllm_baseline_output.pkl
90- [ -f vllm_guided_baseline_output.pkl ] && rm vllm_guided_baseline_output.pkl
91-
92- # Run tests for VLLM v1, but omit guided decoding, as it's development in progress as of 0.8.1
93- export VLLM_USE_V1=1
94- EXPECTED_NUM_TESTS=1
95-
96- set +e
97- # Need to generate baseline first, since running 2 vLLM engines causes
98- # memory issues: https://github.com/vllm-project/vllm/issues/2248
99- python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
100- wait $BASELINE_PID
101-
102- set -e
103-
104- run_server
105- if [ " $SERVER_PID " == " 0" ]; then
106- cat $SERVER_LOG
107- echo -e " \n***\n*** Failed to start $SERVER \n***"
108- exit 1
109- fi
110-
111- set +e
112- python3 -m unittest accuracy_test.VLLMTritonAccuracyTest.test_vllm_model > $CLIENT_LOG 2>&1
113-
114- if [ $? -ne 0 ]; then
115- cat $CLIENT_LOG
116- echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
86+ # Check that warning about V1 Engine appears in log - this warning is expected
87+ if ! grep -q " Engine in background thread is experimental on VLLM_USE_V1=1. Falling back to V0 Engine." $SERVER_LOG ; then
88+ echo -e " \n***\n*** ERROR: Expected warning about vLLM falling back to V0 Engine not found in logs.\n***"
11789 RET=1
118- else
119- check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
120- if [ $? -ne 0 ]; then
121- cat $CLIENT_LOG
122- echo -e " \n***\n*** Test Result Verification FAILED.\n***"
123- RET=1
124- fi
12590fi
126- set -e
127-
128- kill $SERVER_PID
129- wait $SERVER_PID
13091
13192rm -rf models/
13293
13899 echo -e " \n***\n*** Accuracy test PASSED. \n***"
139100fi
140101
141- unset VLLM_USE_V1
142-
143102collect_artifacts_from_subdir
144103
145104exit $RET
0 commit comments