Skip to content

Commit ed62ba4

Browse files
committed
[WIP] Add L0_check_health_vllm
1 parent ca13f02 commit ed62ba4

File tree

3 files changed

+305
-0
lines changed

3 files changed

+305
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
import json
28+
import os
29+
30+
import numpy as np
31+
import pytest
32+
import tritonclient.grpc as grpcclient
33+
34+
35+
class TestCheckHealth:
36+
_grpc_url = "localhost:8001"
37+
_model_name = "vllm_opt"
38+
_sampling_parameters = {"temperature": "0", "top_p": "1"}
39+
_prompt = "In this example,"
40+
41+
def _get_inputs(self, prompt, stream=True, sampling_parameters=None):
42+
inputs = []
43+
44+
inputs.append(grpcclient.InferInput("text_input", [1], "BYTES"))
45+
inputs[-1].set_data_from_numpy(
46+
np.array([prompt.encode("utf-8")], dtype=np.object_)
47+
)
48+
49+
inputs.append(grpcclient.InferInput("stream", [1], "BOOL"))
50+
inputs[-1].set_data_from_numpy(np.array([stream], dtype=bool))
51+
52+
if sampling_parameters is not None:
53+
inputs.append(grpcclient.InferInput("sampling_parameters", [1], "BYTES"))
54+
inputs[-1].set_data_from_numpy(
55+
np.array(
56+
[json.dumps(sampling_parameters).encode("utf-8")], dtype=np.object_
57+
)
58+
)
59+
60+
return inputs
61+
62+
def _callback(self, result, error):
63+
self._responses.append({"result": result, "error": error})
64+
65+
def _llm_infer(self):
66+
inputs = self._get_inputs(
67+
self._prompt, stream=True, sampling_parameters=self._sampling_parameters
68+
)
69+
self._responses = []
70+
with grpcclient.InferenceServerClient(self._grpc_url) as client:
71+
client.start_stream(self._callback)
72+
client.async_stream_infer(
73+
self._model_name, inputs=inputs, parameters=self._sampling_parameters
74+
)
75+
client.stop_stream()
76+
77+
def _assert_text_output_valid(self):
78+
text_output = ""
79+
for response in self._responses:
80+
result, error = response["result"], response["error"]
81+
assert error is None
82+
text_output += result.as_numpy(name="text_output")[0].decode("utf-8")
83+
assert len(text_output) > 0, "output is empty"
84+
assert text_output.count(" ") > 4, "output is not a sentence"
85+
86+
def _assert_infer_exception(self, expected_exception_message):
87+
assert len(self._responses) == 1
88+
for response in self._responses:
89+
result, error = response["result"], response["error"]
90+
assert result is None
91+
assert str(error) == expected_exception_message
92+
93+
def _assert_model_ready(self, expected_readiness):
94+
with grpcclient.InferenceServerClient(self._grpc_url) as client:
95+
assert client.is_model_ready(self._model_name) == expected_readiness
96+
97+
def test_vllm_is_healthy(self):
98+
num_repeats = 3
99+
for i in range(num_repeats):
100+
self._assert_model_ready(True)
101+
self._llm_infer()
102+
self._assert_text_output_valid()
103+
self._assert_model_ready(True)
104+
105+
def test_vllm_not_healthy(self):
106+
self._assert_model_ready(True)
107+
# The 1st infer should complete successfully
108+
self._llm_infer()
109+
self._assert_text_output_valid()
110+
self._assert_model_ready(True)
111+
# The 2nd infer should begin with health check failed
112+
self._llm_infer()
113+
self._assert_infer_exception("vLLM engine is not healthy")
114+
self._assert_model_ready(False)
115+
# The 3rd infer should have model not found
116+
self._llm_infer()
117+
self._assert_infer_exception(
118+
"Request for unknown model: 'vllm_opt' has no available versions"
119+
)
120+
self._assert_model_ready(False)
121+
122+
def test_vllm_enable_health_check_multi_instance(self):
123+
with open(os.environ["SERVER_LOG"]) as f:
124+
server_log = f.read()
125+
expected_vllm_warning = "[vllm] Health check may only be enabled when the model has exactly 1 instance but 2 are found"
126+
assert expected_vllm_warning in server_log
127+
# Health check should be disabled
128+
self.test_vllm_is_healthy()
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
from vllm.engine.async_llm_engine import AsyncLLMEngine as real_AsyncLLMEngine
28+
29+
30+
class mock_AsyncLLMEngine(real_AsyncLLMEngine):
31+
_mock_check_health_count = 0
32+
33+
async def check_health(self) -> None:
34+
self._mock_check_health_count += 1
35+
if self._mock_check_health_count > 1:
36+
raise RuntimeError("Simulated vLLM check_health() failure")

ci/L0_check_health_vllm/test.sh

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/bin/bash
2+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
export CUDA_VISIBLE_DEVICES=0
29+
source ../common/util.sh
30+
31+
pip3 install pytest==8.1.1
32+
pip3 install tritonclient[grpc]
33+
34+
RET=0
35+
36+
function setup_model_repository {
37+
local sample_model_repo_path=${1:-"../../samples/model_repository"}
38+
rm -rf models vllm_baseline_output.pkl && mkdir -p models
39+
cp -r $sample_model_repo_path/vllm_model models/vllm_opt
40+
}
41+
42+
function setup_model_repository_with_multi_instances {
43+
setup_model_repository
44+
echo -e "backend: \"vllm\"" > models/vllm_opt/config.pbtxt
45+
echo -e "instance_group [" >> models/vllm_opt/config.pbtxt
46+
echo -e " { kind: KIND_MODEL }," >> models/vllm_opt/config.pbtxt
47+
echo -e " { kind: KIND_MODEL \n count: 1 }" >> models/vllm_opt/config.pbtxt
48+
echo -e "]" >> models/vllm_opt/config.pbtxt
49+
}
50+
51+
function enable_health_check {
52+
local enable_vllm_health_check="$1"
53+
echo -e "parameters: {" >> models/vllm_opt/config.pbtxt
54+
echo -e " key: \"ENABLE_VLLM_HEALTH_CHECK\"" >> models/vllm_opt/config.pbtxt
55+
echo -e " value: { string_value: \"$enable_vllm_health_check\" }" >> models/vllm_opt/config.pbtxt
56+
echo -e "}" >> models/vllm_opt/config.pbtxt
57+
}
58+
59+
function mock_vllm_async_llm_engine {
60+
mv /opt/tritonserver/backends/vllm/model.py /opt/tritonserver/backends/vllm/.model.py.backup
61+
cp /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
62+
sed -i 's/from vllm.engine.async_llm_engine import AsyncLLMEngine/from mock_async_llm_engine import mock_AsyncLLMEngine as AsyncLLMEngine/' /opt/tritonserver/backends/vllm/model.py
63+
cp mock_async_llm_engine.py /opt/tritonserver/backends/vllm
64+
}
65+
66+
function unmock_vllm_async_llm_engine {
67+
rm -f /opt/tritonserver/backends/vllm/mock_async_llm_engine.py /opt/tritonserver/backends/vllm/model.py
68+
mv /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
69+
}
70+
71+
function test_check_health {
72+
local test_name="$1"
73+
local unit_test_name="$2"
74+
75+
SERVER_LOG="$test_name.server.log"
76+
SERVER_ARGS="--model-repository=models --model-control-mode=explicit --load-model=*"
77+
run_server
78+
if [ "$SERVER_PID" == "0" ]; then
79+
echo -e "\n***\n*** Failed to start $SERVER\n***"
80+
cat $SERVER_LOG
81+
exit 1
82+
fi
83+
84+
set +e
85+
SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=$test_name.report.xml -s -v check_health_test.py::TestCheckHealth::$unit_test_name > $test_name.log
86+
if [ $? -ne 0 ]; then
87+
echo -e "\n***\n*** $test_name FAILED. \n***"
88+
RET=1
89+
fi
90+
set -e
91+
92+
kill $SERVER_PID
93+
wait $SERVER_PID
94+
}
95+
96+
# Test health check unspecified
97+
setup_model_repository
98+
test_check_health "health_check_unspecified" "test_vllm_is_healthy"
99+
100+
# Test health check disabled
101+
setup_model_repository
102+
enable_health_check "false"
103+
test_check_health "health_check_disabled" "test_vllm_is_healthy"
104+
105+
# Test health check enabled
106+
setup_model_repository
107+
enable_health_check "true"
108+
test_check_health "health_check_enabled" "test_vllm_is_healthy"
109+
110+
# Mock check_health() from vLLM
111+
mock_vllm_async_llm_engine
112+
113+
# Test health check unspecified with mocked vLLM check_health() failure
114+
setup_model_repository
115+
test_check_health "health_check_unspecified_mocked_failure" "test_vllm_is_healthy"
116+
117+
# Test health check disabled with mocked vLLM check_health() failure
118+
setup_model_repository
119+
enable_health_check "false"
120+
test_check_health "health_check_disabled_mocked_failure" "test_vllm_is_healthy"
121+
122+
# Test health check enabled with mocked vLLM check_health() failure
123+
setup_model_repository
124+
enable_health_check "true"
125+
test_check_health "health_check_enabled_mocked_failure" "test_vllm_not_healthy"
126+
127+
# Test health check enabled with mocked vLLM check_health() failure when there
128+
# are multiple instances
129+
setup_model_repository_with_multi_instances
130+
enable_health_check "true"
131+
test_check_health "health_check_enabled_multi_instance_mocked_failure" "test_vllm_enable_health_check_multi_instance"
132+
133+
# Unmock check_health()
134+
unmock_vllm_async_llm_engine
135+
136+
if [ $RET -eq 0 ]; then
137+
echo -e "\n***\n*** Test Passed\n***"
138+
else
139+
echo -e "\n***\n*** Test FAILED\n***"
140+
fi
141+
exit $RET

0 commit comments

Comments
 (0)