Skip to content

Commit 1504cb3

Browse files
authored
adding asyncenginedead error catch (#504)
* adding asyncenginedead error catch * catch error in generation
1 parent d27f2df commit 1504cb3

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

model-engine/model_engine_server/inference/vllm/vllm_server.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
1111
from fastapi.responses import Response, StreamingResponse
1212
from vllm.engine.arg_utils import AsyncEngineArgs
13-
from vllm.engine.async_llm_engine import AsyncLLMEngine
13+
from vllm.engine.async_llm_engine import AsyncEngineDeadError, AsyncLLMEngine
1414
from vllm.entrypoints.openai.protocol import CompletionRequest as OpenAICompletionRequest
1515
from vllm.model_executor.guided_decoding import get_guided_decoding_logits_processor
1616
from vllm.outputs import CompletionOutput
@@ -75,7 +75,11 @@ async def generate(request: Request) -> Response:
7575
sampling_params.logits_processors.append(guided_decode_logit_processor)
7676

7777
request_id = random_uuid()
78-
results_generator = engine.generate(prompt, sampling_params, request_id)
78+
try:
79+
results_generator = engine.generate(prompt, sampling_params, request_id)
80+
except AsyncEngineDeadError as e:
81+
print(f"The vllm engine is dead, exiting the pod: {e}")
82+
exit(1)
7983

8084
# Streaming case
8185
async def stream_results() -> AsyncGenerator[str, None]:
@@ -192,6 +196,7 @@ def extract_logprobs(logprobs: Dict[int, Logprob]) -> Dict[int, float]:
192196

193197
engine_args = AsyncEngineArgs.from_cli_args(args)
194198
engine = AsyncLLMEngine.from_engine_args(engine_args)
199+
engine.check_health()
195200

196201
signal.signal(signal.SIGUSR1, debug)
197202

0 commit comments

Comments
 (0)