2
2
import importlib
3
3
import inspect
4
4
import re
5
- import signal
5
+ from argparse import Namespace
6
6
from contextlib import asynccontextmanager
7
7
from http import HTTPStatus
8
8
from multiprocessing import Process
9
9
from typing import AsyncIterator , Set
10
10
11
- import fastapi
12
- import uvicorn
13
- from fastapi import APIRouter , Request
11
+ from fastapi import APIRouter , FastAPI , Request
14
12
from fastapi .exceptions import RequestValidationError
15
13
from fastapi .middleware .cors import CORSMiddleware
16
14
from fastapi .responses import JSONResponse , Response , StreamingResponse
22
20
from vllm .engine .arg_utils import AsyncEngineArgs
23
21
from vllm .engine .async_llm_engine import AsyncLLMEngine
24
22
from vllm .engine .protocol import AsyncEngineClient
23
+ from vllm .entrypoints .launcher import serve_http
25
24
from vllm .entrypoints .logger import RequestLogger
26
25
from vllm .entrypoints .openai .cli_args import make_arg_parser
27
26
# yapf conflicts with isort for this block
@@ -71,7 +70,7 @@ def model_is_embedding(model_name: str) -> bool:
71
70
72
71
73
72
@asynccontextmanager
74
- async def lifespan (app : fastapi . FastAPI ):
73
+ async def lifespan (app : FastAPI ):
75
74
76
75
async def _force_log ():
77
76
while True :
@@ -135,7 +134,7 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
135
134
router = APIRouter ()
136
135
137
136
138
- def mount_metrics (app : fastapi . FastAPI ):
137
+ def mount_metrics (app : FastAPI ):
139
138
# Add prometheus asgi middleware to route /metrics requests
140
139
metrics_route = Mount ("/metrics" , make_asgi_app ())
141
140
# Workaround for 307 Redirect for /metrics
@@ -225,8 +224,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
225
224
return JSONResponse (content = generator .model_dump ())
226
225
227
226
228
- def build_app (args ) :
229
- app = fastapi . FastAPI (lifespan = lifespan )
227
+ def build_app (args : Namespace ) -> FastAPI :
228
+ app = FastAPI (lifespan = lifespan )
230
229
app .include_router (router )
231
230
app .root_path = args .root_path
232
231
@@ -274,11 +273,10 @@ async def authentication(request: Request, call_next):
274
273
return app
275
274
276
275
277
- async def build_server (
276
+ async def init_app (
278
277
async_engine_client : AsyncEngineClient ,
279
- args ,
280
- ** uvicorn_kwargs ,
281
- ) -> uvicorn .Server :
278
+ args : Namespace ,
279
+ ) -> FastAPI :
282
280
app = build_app (args )
283
281
284
282
if args .served_model_name is not None :
@@ -334,62 +332,31 @@ async def build_server(
334
332
)
335
333
app .root_path = args .root_path
336
334
337
- logger .info ("Available routes are:" )
338
- for route in app .routes :
339
- if not hasattr (route , 'methods' ):
340
- continue
341
- methods = ', ' .join (route .methods )
342
- logger .info ("Route: %s, Methods: %s" , route .path , methods )
343
-
344
- config = uvicorn .Config (
345
- app ,
346
- host = args .host ,
347
- port = args .port ,
348
- log_level = args .uvicorn_log_level ,
349
- timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
350
- ssl_keyfile = args .ssl_keyfile ,
351
- ssl_certfile = args .ssl_certfile ,
352
- ssl_ca_certs = args .ssl_ca_certs ,
353
- ssl_cert_reqs = args .ssl_cert_reqs ,
354
- ** uvicorn_kwargs ,
355
- )
356
-
357
- return uvicorn .Server (config )
335
+ return app
358
336
359
337
360
338
async def run_server (args , ** uvicorn_kwargs ) -> None :
361
339
logger .info ("vLLM API server version %s" , VLLM_VERSION )
362
340
logger .info ("args: %s" , args )
363
341
364
- shutdown_task = None
365
342
async with build_async_engine_client (args ) as async_engine_client :
366
-
367
- server = await build_server (
368
- async_engine_client ,
369
- args ,
343
+ app = await init_app (async_engine_client , args )
344
+
345
+ shutdown_task = await serve_http (
346
+ app ,
347
+ host = args .host ,
348
+ port = args .port ,
349
+ log_level = args .uvicorn_log_level ,
350
+ timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
351
+ ssl_keyfile = args .ssl_keyfile ,
352
+ ssl_certfile = args .ssl_certfile ,
353
+ ssl_ca_certs = args .ssl_ca_certs ,
354
+ ssl_cert_reqs = args .ssl_cert_reqs ,
370
355
** uvicorn_kwargs ,
371
356
)
372
357
373
- loop = asyncio .get_running_loop ()
374
-
375
- server_task = loop .create_task (server .serve ())
376
-
377
- def signal_handler () -> None :
378
- # prevents the uvicorn signal handler to exit early
379
- server_task .cancel ()
380
-
381
- loop .add_signal_handler (signal .SIGINT , signal_handler )
382
- loop .add_signal_handler (signal .SIGTERM , signal_handler )
383
-
384
- try :
385
- await server_task
386
- except asyncio .CancelledError :
387
- logger .info ("Gracefully stopping http server" )
388
- shutdown_task = server .shutdown ()
389
-
390
- if shutdown_task :
391
- # NB: Await server shutdown only after the backend context is exited
392
- await shutdown_task
358
+ # NB: Await server shutdown only after the backend context is exited
359
+ await shutdown_task
393
360
394
361
395
362
if __name__ == "__main__" :
@@ -399,4 +366,5 @@ def signal_handler() -> None:
399
366
description = "vLLM OpenAI-Compatible RESTful API server." )
400
367
parser = make_arg_parser (parser )
401
368
args = parser .parse_args ()
369
+
402
370
asyncio .run (run_server (args ))
0 commit comments