File tree Expand file tree Collapse file tree 3 files changed +4
-7
lines changed Expand file tree Collapse file tree 3 files changed +4
-7
lines changed Original file line number Diff line number Diff line change @@ -117,7 +117,7 @@ def test_vllm_not_healthy(self):
117117 # The 2nd infer should begin with health check failed
118118 self ._llm_infer ()
119119 self ._assert_infer_exception (
120- "vLLM engine is not healthy and model will be unloaded "
120+ "Model is unavailable due to unhealthy vLLM engine "
121121 )
122122 self ._assert_model_ready (False )
123123 # The 3rd infer should have model not found
Original file line number Diff line number Diff line change 3535
3636The vLLM backend supports checking for
3737[ vLLM Engine Health] ( https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/engine/async_llm_engine.py#L1177-L1185 )
38- upon receiving each inference request. If the health check fails, the entire
39- model will be unloaded, so its state becomes NOT Ready at the server, which can
40- be queried by the
38+ upon receiving each inference request. If the health check fails, the model
39+ state will becomes NOT Ready at the server, which can be queried by the
4140[ Repository Index] ( https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md#index )
4241or
4342[ Model Ready] ( https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/library/http_client.h#L178-L192 )
@@ -54,5 +53,3 @@ parameters: {
5453and select
5554[ Model Control Mode EXPLICIT] ( https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_management.md#model-control-mode-explicit )
5655when the server is started.
57-
58- Supported since r24.12.
Original file line number Diff line number Diff line change @@ -701,7 +701,7 @@ def _check_health(self, requests):
701701 request .get_response_sender ().send (
702702 pb_utils .InferenceResponse (
703703 error = pb_utils .TritonError (
704- message = "vLLM engine is not healthy and model will be unloaded " ,
704+ message = "Model is unavailable due to unhealthy vLLM engine " ,
705705 code = pb_utils .TritonError .UNAVAILABLE ,
706706 )
707707 ),
You can’t perform that action at this time.
0 commit comments