33import json
44import os
55import signal
6- import socket
76import subprocess
87import traceback
98from logging import Logger
109from typing import AsyncGenerator , Dict , List , Optional
1110
11+ import vllm .envs as envs
1212from fastapi import APIRouter , BackgroundTasks , Request
1313from fastapi .responses import Response , StreamingResponse
1414from vllm .engine .async_llm_engine import (
1717)
1818from vllm .engine .protocol import EngineClient
1919from vllm .entrypoints .launcher import serve_http
20- from vllm .entrypoints .openai .api_server import build_app , build_async_engine_client , init_app_state
20+ from vllm .entrypoints .openai .api_server import (
21+ build_app ,
22+ build_async_engine_client ,
23+ init_app_state ,
24+ load_log_config ,
25+ maybe_register_tokenizer_info_endpoint ,
26+ setup_server ,
27+ )
2128from vllm .entrypoints .openai .cli_args import make_arg_parser
29+ from vllm .entrypoints .openai .tool_parsers import ToolParserManager
2230from vllm .outputs import CompletionOutput
2331from vllm .sampling_params import SamplingParams
2432from vllm .sequence import Logprob
2533from vllm .utils import FlexibleArgumentParser , random_uuid
26- from vllm .version import __version__ as VLLM_VERSION
2734
2835logger = Logger ("vllm_server" )
2936
@@ -197,34 +204,48 @@ def parse_args(parser: FlexibleArgumentParser):
197204
198205
199206async def run_server (args , ** uvicorn_kwargs ) -> None :
200- logger .info ("vLLM API server version %s" , VLLM_VERSION )
201- logger .info ("args: %s" , args )
207+ """Run a single-worker API server."""
208+ listen_address , sock = setup_server (args )
209+ await run_server_worker (listen_address , sock , args , ** uvicorn_kwargs )
210+
211+
212+ async def run_server_worker (
213+ listen_address , sock , args , client_config = None , ** uvicorn_kwargs
214+ ) -> None :
215+ """Run a single API server worker."""
202216
203- temp_socket = socket . socket ( socket . AF_INET , socket . SOCK_STREAM ) # nosemgrep
204- temp_socket . bind (( "" , args .port ) )
217+ if args . tool_parser_plugin and len ( args . tool_parser_plugin ) > 3 :
218+ ToolParserManager . import_tool_parser ( args .tool_parser_plugin )
205219
206- def signal_handler (* _ ) -> None :
207- # Interrupt server on sigterm while initializing
208- raise KeyboardInterrupt ("terminated" )
220+ server_index = client_config .get ("client_index" , 0 ) if client_config else 0
209221
210- signal .signal (signal .SIGTERM , signal_handler )
222+ # Load logging config for uvicorn if specified
223+ log_config = load_log_config (args .log_config_file )
224+ if log_config is not None :
225+ uvicorn_kwargs ["log_config" ] = log_config
211226
212227 global engine_client
213- async with build_async_engine_client (args ) as engine_client :
214- app = build_app (args )
215228
216- model_config = await engine_client .get_model_config ()
217- init_app_state (engine_client , model_config , app .state , args )
229+ async with build_async_engine_client (args , client_config ) as engine_client :
230+ maybe_register_tokenizer_info_endpoint (args )
231+ app = build_app (args )
218232
219- temp_socket .close ()
233+ vllm_config = await engine_client .get_vllm_config ()
234+ await init_app_state (engine_client , vllm_config , app .state , args )
220235 app .include_router (router )
221236
237+ logger .info ("Starting vLLM API server %d on %s" , server_index , listen_address )
222238 shutdown_task = await serve_http (
223239 app ,
240+ sock = sock ,
241+ enable_ssl_refresh = args .enable_ssl_refresh ,
224242 host = args .host ,
225243 port = args .port ,
226244 log_level = args .uvicorn_log_level ,
227- timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
245+ # NOTE: When the 'disable_uvicorn_access_log' value is True,
246+ # no access log will be output.
247+ access_log = not args .disable_uvicorn_access_log ,
248+ timeout_keep_alive = envs .VLLM_HTTP_TIMEOUT_KEEP_ALIVE ,
228249 ssl_keyfile = args .ssl_keyfile ,
229250 ssl_certfile = args .ssl_certfile ,
230251 ssl_ca_certs = args .ssl_ca_certs ,
@@ -233,7 +254,10 @@ def signal_handler(*_) -> None:
233254 )
234255
235256 # NB: Await server shutdown only after the backend context is exited
236- await shutdown_task
257+ try :
258+ await shutdown_task
259+ finally :
260+ sock .close ()
237261
238262
239263if __name__ == "__main__" :
0 commit comments