|
8 | 8 | from argparse import Namespace |
9 | 9 | from contextlib import asynccontextmanager |
10 | 10 | from http import HTTPStatus |
11 | | -from typing import AsyncIterator, Set |
| 11 | +from typing import AsyncIterator, Optional, Set |
12 | 12 |
|
13 | 13 | from fastapi import APIRouter, FastAPI, Request |
14 | 14 | from fastapi.exceptions import RequestValidationError |
|
60 | 60 | openai_serving_tokenization: OpenAIServingTokenization |
61 | 61 | prometheus_multiproc_dir: tempfile.TemporaryDirectory |
62 | 62 |
|
| 63 | +# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765) |
63 | 64 | logger = init_logger('vllm.entrypoints.openai.api_server') |
64 | 65 |
|
65 | 66 | _running_tasks: Set[asyncio.Task] = set() |
@@ -94,7 +95,15 @@ async def _force_log(): |
94 | 95 |
|
95 | 96 | @asynccontextmanager |
96 | 97 | async def build_async_engine_client( |
97 | | - args: Namespace) -> AsyncIterator[AsyncEngineClient]: |
| 98 | + args: Namespace) -> AsyncIterator[Optional[AsyncEngineClient]]: |
| 99 | + """ |
| 100 | + Create AsyncEngineClient, either: |
| 101 | + - in-process using the AsyncLLMEngine Directly |
| 102 | + - multiprocess using AsyncLLMEngine RPC |
| 103 | +
|
| 104 | + Returns the Client or None if the creation failed. |
| 105 | + """ |
| 106 | + |
98 | 107 | # Context manager to handle async_engine_client lifecycle |
99 | 108 | # Ensures everything is shutdown and cleaned up on error/exit |
100 | 109 | global engine_args |
@@ -157,11 +166,13 @@ async def build_async_engine_client( |
157 | 166 | try: |
158 | 167 | await rpc_client.setup() |
159 | 168 | break |
160 | | - except TimeoutError as e: |
| 169 | + except TimeoutError: |
161 | 170 | if not rpc_server_process.is_alive(): |
162 | | - raise RuntimeError( |
163 | | - "The server process died before " |
164 | | - "responding to the readiness probe") from e |
| 171 | + logger.error( |
| 172 | + "RPCServer process died before responding " |
| 173 | + "to readiness probe") |
| 174 | + yield None |
| 175 | + return |
165 | 176 |
|
166 | 177 | yield async_engine_client |
167 | 178 | finally: |
@@ -410,6 +421,10 @@ async def run_server(args, **uvicorn_kwargs) -> None: |
410 | 421 | logger.info("args: %s", args) |
411 | 422 |
|
412 | 423 | async with build_async_engine_client(args) as async_engine_client: |
| 424 | + # If None, creation of the client failed and we exit. |
| 425 | + if async_engine_client is None: |
| 426 | + return |
| 427 | + |
413 | 428 | app = await init_app(async_engine_client, args) |
414 | 429 |
|
415 | 430 | shutdown_task = await serve_http( |
|
0 commit comments