3030from typing_extensions import assert_never
3131
3232import vllm .envs as envs
33- from vllm .config import ModelConfig
33+ from vllm .config import VllmConfig
3434from vllm .engine .arg_utils import AsyncEngineArgs
3535from vllm .engine .async_llm_engine import AsyncLLMEngine # type: ignore
3636from vllm .engine .multiprocessing .client import MQLLMEngineClient
@@ -327,6 +327,7 @@ def mount_metrics(app: FastAPI):
327327 "/load" ,
328328 "/ping" ,
329329 "/version" ,
330+ "/server_info" ,
330331 ],
331332 registry = registry ,
332333 ).add ().instrument (app ).expose (app )
@@ -687,6 +688,11 @@ async def do_rerank_v2(request: RerankRequest, raw_request: Request):
687688
688689if envs .VLLM_SERVER_DEV_MODE :
689690
691+ @router .get ("/server_info" )
692+ async def show_server_info (raw_request : Request ):
693+ server_info = {"vllm_config" : str (raw_request .app .state .vllm_config )}
694+ return JSONResponse (content = server_info )
695+
690696 @router .post ("/reset_prefix_cache" )
691697 async def reset_prefix_cache (raw_request : Request ):
692698 """
@@ -894,7 +900,7 @@ async def log_response(request: Request, call_next):
894900
895901async def init_app_state (
896902 engine_client : EngineClient ,
897- model_config : ModelConfig ,
903+ vllm_config : VllmConfig ,
898904 state : State ,
899905 args : Namespace ,
900906) -> None :
@@ -915,6 +921,8 @@ async def init_app_state(
915921
916922 state .engine_client = engine_client
917923 state .log_stats = not args .disable_log_stats
924+ state .vllm_config = vllm_config
925+ model_config = vllm_config .model_config
918926
919927 resolved_chat_template = load_chat_template (args .chat_template )
920928 if resolved_chat_template is not None :
@@ -1069,8 +1077,8 @@ def signal_handler(*_) -> None:
10691077 async with build_async_engine_client (args ) as engine_client :
10701078 app = build_app (args )
10711079
1072- model_config = await engine_client .get_model_config ()
1073- await init_app_state (engine_client , model_config , app .state , args )
1080+ vllm_config = await engine_client .get_vllm_config ()
1081+ await init_app_state (engine_client , vllm_config , app .state , args )
10741082
10751083 def _listen_addr (a : str ) -> str :
10761084 if is_valid_ipv6_address (a ):
0 commit comments