@@ -112,7 +112,10 @@ def initialize(self, args):
112112 self .output_dtype = pb_utils .triton_string_to_numpy (output_config ["data_type" ])
113113
114114 # Setup vLLM engine health check
115- self ._setup_health_check ()
115+ self ._enable_health_check = self ._get_bool_config_param (
116+ "ENABLE_VLLM_HEALTH_CHECK"
117+ )
118+ self ._is_healthy = True
116119
117120 # Prepare vLLM engine
118121 self .init_engine ()
@@ -134,31 +137,6 @@ def initialize(self, args):
134137 self ._shutdown_event = asyncio .Event ()
135138 self ._event_thread .start ()
136139
137- def _setup_health_check (self ):
138- # Check if health check should be enabled
139- self ._enable_health_check = (
140- "ENABLE_VLLM_HEALTH_CHECK" in self .model_config ["parameters" ]
141- ) and (
142- self .model_config ["parameters" ]["ENABLE_VLLM_HEALTH_CHECK" ][
143- "string_value"
144- ].lower ()
145- in ["yes" , "true" ]
146- )
147- # Setup health check if enabled
148- if self ._enable_health_check :
149- # Only enable health check if there is exactly 1 instance
150- num_instances = 0
151- for group in self .model_config ["instance_group" ]:
152- num_instances += group ["count" ]
153- if num_instances != 1 :
154- self .logger .log_warn (
155- f"[vllm] Health check may only be enabled when the model has exactly 1 instance but { num_instances } are found"
156- )
157- self ._enable_health_check = False
158- return
159- # Set is healthy flag
160- self ._is_healthy = True
161-
162140 def init_engine (self ):
163141 # Currently, Triton needs to use decoupled policy for asynchronously
164142 # forwarding requests to vLLM engine, so assert it.
@@ -191,9 +169,7 @@ def init_engine(self):
191169 # Create vLLM custom metrics
192170 self .vllm_metrics = None
193171 if (
194- "REPORT_CUSTOM_METRICS" in self .model_config ["parameters" ]
195- and self .model_config ["parameters" ]["REPORT_CUSTOM_METRICS" ]["string_value" ]
196- == "yes"
172+ self ._get_bool_config_param ("REPORT_CUSTOM_METRICS" )
197173 and not aync_engine_args .disable_log_stats
198174 ):
199175 try :
@@ -214,6 +190,12 @@ def init_engine(self):
214190 else :
215191 raise e
216192
193+ def _get_bool_config_param (self , param_name : str ) -> bool :
194+ return (param_name in self .model_config ["parameters" ]) and (
195+ self .model_config ["parameters" ][param_name ]["string_value" ].lower ()
196+ in ["yes" , "true" ]
197+ )
198+
217199 def setup_lora (self ):
218200 self .enable_lora = False
219201
0 commit comments