30
30
from typing_extensions import assert_never
31
31
32
32
import vllm .envs as envs
33
- from vllm .config import ModelConfig
33
+ from vllm .config import VllmConfig
34
34
from vllm .engine .arg_utils import AsyncEngineArgs
35
35
from vllm .engine .async_llm_engine import AsyncLLMEngine # type: ignore
36
36
from vllm .engine .multiprocessing .client import MQLLMEngineClient
@@ -327,6 +327,7 @@ def mount_metrics(app: FastAPI):
327
327
"/load" ,
328
328
"/ping" ,
329
329
"/version" ,
330
+ "/server_info" ,
330
331
],
331
332
registry = registry ,
332
333
).add ().instrument (app ).expose (app )
@@ -687,6 +688,11 @@ async def do_rerank_v2(request: RerankRequest, raw_request: Request):
687
688
688
689
if envs .VLLM_SERVER_DEV_MODE :
689
690
691
+ @router .get ("/server_info" )
692
+ async def show_server_info (raw_request : Request ):
693
+ server_info = {"vllm_config" : str (raw_request .app .state .vllm_config )}
694
+ return JSONResponse (content = server_info )
695
+
690
696
@router .post ("/reset_prefix_cache" )
691
697
async def reset_prefix_cache (raw_request : Request ):
692
698
"""
@@ -894,7 +900,7 @@ async def log_response(request: Request, call_next):
894
900
895
901
async def init_app_state (
896
902
engine_client : EngineClient ,
897
- model_config : ModelConfig ,
903
+ vllm_config : VllmConfig ,
898
904
state : State ,
899
905
args : Namespace ,
900
906
) -> None :
@@ -915,6 +921,8 @@ async def init_app_state(
915
921
916
922
state .engine_client = engine_client
917
923
state .log_stats = not args .disable_log_stats
924
+ state .vllm_config = vllm_config
925
+ model_config = vllm_config .model_config
918
926
919
927
resolved_chat_template = load_chat_template (args .chat_template )
920
928
if resolved_chat_template is not None :
@@ -1069,8 +1077,8 @@ def signal_handler(*_) -> None:
1069
1077
async with build_async_engine_client (args ) as engine_client :
1070
1078
app = build_app (args )
1071
1079
1072
- model_config = await engine_client .get_model_config ()
1073
- await init_app_state (engine_client , model_config , app .state , args )
1080
+ vllm_config = await engine_client .get_vllm_config ()
1081
+ await init_app_state (engine_client , vllm_config , app .state , args )
1074
1082
1075
1083
def _listen_addr (a : str ) -> str :
1076
1084
if is_valid_ipv6_address (a ):
0 commit comments