Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 970dfdc

Browse files
[Frontend] Improve Startup Failure UX (vllm-project#7716)
1 parent 91f4522 commit 970dfdc

File tree

2 files changed

+37
-19
lines changed

2 files changed

+37
-19
lines changed

tests/entrypoints/openai/test_mp_api_server.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import time
2+
13
import pytest
24

35
from vllm.entrypoints.openai.api_server import build_async_engine_client
@@ -8,19 +10,20 @@
810
@pytest.mark.asyncio
911
async def test_mp_crash_detection():
1012

11-
with pytest.raises(RuntimeError) as excinfo:
12-
parser = FlexibleArgumentParser(
13-
description="vLLM's remote OpenAI server.")
14-
parser = make_arg_parser(parser)
15-
args = parser.parse_args([])
16-
# use an invalid tensor_parallel_size to trigger the
17-
# error in the server
18-
args.tensor_parallel_size = 65536
19-
20-
async with build_async_engine_client(args):
21-
pass
22-
assert "The server process died before responding to the readiness probe"\
23-
in str(excinfo.value)
13+
parser = FlexibleArgumentParser(description="vLLM's remote OpenAI server.")
14+
parser = make_arg_parser(parser)
15+
args = parser.parse_args([])
16+
# use an invalid tensor_parallel_size to trigger the
17+
# error in the server
18+
args.tensor_parallel_size = 65536
19+
20+
start = time.perf_counter()
21+
async with build_async_engine_client(args):
22+
pass
23+
end = time.perf_counter()
24+
25+
assert end - start < 60, ("Expected vLLM to gracefully shutdown in <60s "
26+
"if there is an error in the startup.")
2427

2528

2629
@pytest.mark.asyncio

vllm/entrypoints/openai/api_server.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from argparse import Namespace
99
from contextlib import asynccontextmanager
1010
from http import HTTPStatus
11-
from typing import AsyncIterator, Set
11+
from typing import AsyncIterator, Optional, Set
1212

1313
from fastapi import APIRouter, FastAPI, Request
1414
from fastapi.exceptions import RequestValidationError
@@ -60,6 +60,7 @@
6060
openai_serving_tokenization: OpenAIServingTokenization
6161
prometheus_multiproc_dir: tempfile.TemporaryDirectory
6262

63+
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
6364
logger = init_logger('vllm.entrypoints.openai.api_server')
6465

6566
_running_tasks: Set[asyncio.Task] = set()
@@ -94,7 +95,15 @@ async def _force_log():
9495

9596
@asynccontextmanager
9697
async def build_async_engine_client(
97-
args: Namespace) -> AsyncIterator[AsyncEngineClient]:
98+
args: Namespace) -> AsyncIterator[Optional[AsyncEngineClient]]:
99+
"""
100+
Create AsyncEngineClient, either:
101+
- in-process using the AsyncLLMEngine Directly
102+
- multiprocess using AsyncLLMEngine RPC
103+
104+
Returns the Client or None if the creation failed.
105+
"""
106+
98107
# Context manager to handle async_engine_client lifecycle
99108
# Ensures everything is shutdown and cleaned up on error/exit
100109
global engine_args
@@ -157,11 +166,13 @@ async def build_async_engine_client(
157166
try:
158167
await rpc_client.setup()
159168
break
160-
except TimeoutError as e:
169+
except TimeoutError:
161170
if not rpc_server_process.is_alive():
162-
raise RuntimeError(
163-
"The server process died before "
164-
"responding to the readiness probe") from e
171+
logger.error(
172+
"RPCServer process died before responding "
173+
"to readiness probe")
174+
yield None
175+
return
165176

166177
yield async_engine_client
167178
finally:
@@ -410,6 +421,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
410421
logger.info("args: %s", args)
411422

412423
async with build_async_engine_client(args) as async_engine_client:
424+
# If None, creation of the client failed and we exit.
425+
if async_engine_client is None:
426+
return
427+
413428
app = await init_app(async_engine_client, args)
414429

415430
shutdown_task = await serve_http(

0 commit comments

Comments
 (0)