Skip to content

Commit 33367f6

Browse files
committed
Allow health check to be enableable for all instance counts
1 parent 4803ee0 commit 33367f6

File tree

3 files changed

+12
-55
lines changed

3 files changed

+12
-55
lines changed

ci/L0_check_health_vllm/check_health_test.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@
2525
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

2727
import json
28-
import os
2928

3029
import numpy as np
31-
import pytest
3230
import tritonclient.grpc as grpcclient
3331

3432

@@ -118,11 +116,3 @@ def test_vllm_not_healthy(self):
118116
"Request for unknown model: 'vllm_opt' has no available versions"
119117
)
120118
self._assert_model_ready(False)
121-
122-
def test_vllm_enable_health_check_multi_instance(self):
123-
with open(os.environ["SERVER_LOG"]) as f:
124-
server_log = f.read()
125-
expected_vllm_warning = "[vllm] Health check may only be enabled when the model has exactly 1 instance but 2 are found"
126-
assert expected_vllm_warning in server_log
127-
# Health check should be disabled
128-
self.test_vllm_is_healthy()

ci/L0_check_health_vllm/test.sh

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,6 @@ function setup_model_repository {
3939
cp -r $sample_model_repo_path/vllm_model models/vllm_opt
4040
}
4141

42-
function setup_model_repository_with_multi_instances {
43-
setup_model_repository
44-
echo -e "backend: \"vllm\"" > models/vllm_opt/config.pbtxt
45-
echo -e "instance_group [" >> models/vllm_opt/config.pbtxt
46-
echo -e " { kind: KIND_MODEL }," >> models/vllm_opt/config.pbtxt
47-
echo -e " { kind: KIND_MODEL \n count: 1 }" >> models/vllm_opt/config.pbtxt
48-
echo -e "]" >> models/vllm_opt/config.pbtxt
49-
}
50-
5142
function enable_health_check {
5243
local enable_vllm_health_check="$1"
5344
echo -e "parameters: {" >> models/vllm_opt/config.pbtxt
@@ -82,7 +73,7 @@ function test_check_health {
8273
fi
8374

8475
set +e
85-
SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=$test_name.report.xml -s -v check_health_test.py::TestCheckHealth::$unit_test_name > $test_name.log
76+
python3 -m pytest --junitxml=$test_name.report.xml -s -v check_health_test.py::TestCheckHealth::$unit_test_name > $test_name.log
8677
if [ $? -ne 0 ]; then
8778
echo -e "\n***\n*** $test_name FAILED. \n***"
8879
RET=1
@@ -124,12 +115,6 @@ setup_model_repository
124115
enable_health_check "true"
125116
test_check_health "health_check_enabled_mocked_failure" "test_vllm_not_healthy"
126117

127-
# Test health check enabled with mocked vLLM check_health() failure when there
128-
# are multiple instances
129-
setup_model_repository_with_multi_instances
130-
enable_health_check "true"
131-
test_check_health "health_check_enabled_multi_instance_mocked_failure" "test_vllm_enable_health_check_multi_instance"
132-
133118
# Unmock check_health()
134119
unmock_vllm_async_llm_engine
135120

src/model.py

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,10 @@ def initialize(self, args):
112112
self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
113113

114114
# Setup vLLM engine health check
115-
self._setup_health_check()
115+
self._enable_health_check = self._get_bool_config_param(
116+
"ENABLE_VLLM_HEALTH_CHECK"
117+
)
118+
self._is_healthy = True
116119

117120
# Prepare vLLM engine
118121
self.init_engine()
@@ -134,31 +137,6 @@ def initialize(self, args):
134137
self._shutdown_event = asyncio.Event()
135138
self._event_thread.start()
136139

137-
def _setup_health_check(self):
138-
# Check if health check should be enabled
139-
self._enable_health_check = (
140-
"ENABLE_VLLM_HEALTH_CHECK" in self.model_config["parameters"]
141-
) and (
142-
self.model_config["parameters"]["ENABLE_VLLM_HEALTH_CHECK"][
143-
"string_value"
144-
].lower()
145-
in ["yes", "true"]
146-
)
147-
# Setup health check if enabled
148-
if self._enable_health_check:
149-
# Only enable health check if there is exactly 1 instance
150-
num_instances = 0
151-
for group in self.model_config["instance_group"]:
152-
num_instances += group["count"]
153-
if num_instances != 1:
154-
self.logger.log_warn(
155-
f"[vllm] Health check may only be enabled when the model has exactly 1 instance but {num_instances} are found"
156-
)
157-
self._enable_health_check = False
158-
return
159-
# Set is healthy flag
160-
self._is_healthy = True
161-
162140
def init_engine(self):
163141
# Currently, Triton needs to use decoupled policy for asynchronously
164142
# forwarding requests to vLLM engine, so assert it.
@@ -191,9 +169,7 @@ def init_engine(self):
191169
# Create vLLM custom metrics
192170
self.vllm_metrics = None
193171
if (
194-
"REPORT_CUSTOM_METRICS" in self.model_config["parameters"]
195-
and self.model_config["parameters"]["REPORT_CUSTOM_METRICS"]["string_value"]
196-
== "yes"
172+
self._get_bool_config_param("REPORT_CUSTOM_METRICS")
197173
and not aync_engine_args.disable_log_stats
198174
):
199175
try:
@@ -214,6 +190,12 @@ def init_engine(self):
214190
else:
215191
raise e
216192

193+
def _get_bool_config_param(self, param_name: str) -> bool:
194+
return (param_name in self.model_config["parameters"]) and (
195+
self.model_config["parameters"][param_name]["string_value"].lower()
196+
in ["yes", "true"]
197+
)
198+
217199
def setup_lora(self):
218200
self.enable_lora = False
219201

0 commit comments

Comments
 (0)