Skip to content

Commit 8db5554

Browse files
committed
ip
1 parent 0757124 commit 8db5554

File tree

1 file changed

+5
-46
lines changed
  • ci/L0_backend_vllm/accuracy_test

1 file changed

+5
-46
lines changed

ci/L0_backend_vllm/accuracy_test/test.sh

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,13 @@ sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.3/' models/v
4545
[ -f vllm_baseline_output.pkl ] && rm vllm_baseline_output.pkl
4646
RET=0
4747

48-
export VLLM_USE_V1=0
49-
5048
set +e
5149
# Need to generate baseline first, since running 2 vLLM engines causes
5250
# memory issues: https://github.com/vllm-project/vllm/issues/2248
53-
python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
51+
VLLM_USE_V1=0 python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5452
wait $BASELINE_PID
5553

56-
python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
54+
VLLM_USE_V1=0 python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5755
wait $BASELINE_PID
5856

5957
set -e
@@ -85,48 +83,11 @@ set -e
8583
kill $SERVER_PID
8684
wait $SERVER_PID
8785

88-
# Remove old baseline files if they exist
89-
[ -f vllm_baseline_output.pkl ] && rm vllm_baseline_output.pkl
90-
[ -f vllm_guided_baseline_output.pkl ] && rm vllm_guided_baseline_output.pkl
91-
92-
# Run tests for VLLM v1, but omit guided decoding, as it's development in progress as of 0.8.1
93-
export VLLM_USE_V1=1
94-
EXPECTED_NUM_TESTS=1
95-
96-
set +e
97-
# Need to generate baseline first, since running 2 vLLM engines causes
98-
# memory issues: https://github.com/vllm-project/vllm/issues/2248
99-
python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
100-
wait $BASELINE_PID
101-
102-
set -e
103-
104-
run_server
105-
if [ "$SERVER_PID" == "0" ]; then
106-
cat $SERVER_LOG
107-
echo -e "\n***\n*** Failed to start $SERVER\n***"
108-
exit 1
109-
fi
110-
111-
set +e
112-
python3 -m unittest accuracy_test.VLLMTritonAccuracyTest.test_vllm_model > $CLIENT_LOG 2>&1
113-
114-
if [ $? -ne 0 ]; then
115-
cat $CLIENT_LOG
116-
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
86+
# Check that warning about V1 Engine appears in log - this warning is expected
87+
if ! grep -q "Engine in background thread is experimental on VLLM_USE_V1=1. Falling back to V0 Engine." $SERVER_LOG; then
88+
echo -e "\n***\n*** ERROR: Expected warning about vLLM falling back to V0 Engine not found in logs.\n***"
11789
RET=1
118-
else
119-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
120-
if [ $? -ne 0 ]; then
121-
cat $CLIENT_LOG
122-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
123-
RET=1
124-
fi
12590
fi
126-
set -e
127-
128-
kill $SERVER_PID
129-
wait $SERVER_PID
13091

13192
rm -rf models/
13293

@@ -138,8 +99,6 @@ else
13899
echo -e "\n***\n*** Accuracy test PASSED. \n***"
139100
fi
140101

141-
unset VLLM_USE_V1
142-
143102
collect_artifacts_from_subdir
144103

145104
exit $RET

0 commit comments

Comments
 (0)