Skip to content

Commit 66aa4c0

Browse files
[Feature] Add middleware to log API Server responses (#15593)
Signed-off-by: Yuan Tang <[email protected]>
1 parent 2471815 commit 66aa4c0

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from fastapi.exceptions import RequestValidationError
2525
from fastapi.middleware.cors import CORSMiddleware
2626
from fastapi.responses import JSONResponse, Response, StreamingResponse
27+
from starlette.concurrency import iterate_in_threadpool
2728
from starlette.datastructures import State
2829
from starlette.routing import Mount
2930
from typing_extensions import assert_never
@@ -846,6 +847,21 @@ async def add_request_id(request: Request, call_next):
846847
response.headers["X-Request-Id"] = request_id
847848
return response
848849

850+
if envs.VLLM_DEBUG_LOG_API_SERVER_RESPONSE:
851+
logger.warning("CAUTION: Enabling log response in the API Server. "
852+
"This can include sensitive information and should be "
853+
"avoided in production.")
854+
855+
@app.middleware("http")
856+
async def log_response(request: Request, call_next):
857+
response = await call_next(request)
858+
response_body = [
859+
section async for section in response.body_iterator
860+
]
861+
response.body_iterator = iterate_in_threadpool(iter(response_body))
862+
logger.info("response_body={%s}", response_body[0].decode())
863+
return response
864+
849865
for middleware in args.middleware:
850866
module_path, object_name = middleware.rsplit(".", 1)
851867
imported = getattr(importlib.import_module(module_path), object_name)

vllm/envs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
270270
"VLLM_API_KEY":
271271
lambda: os.environ.get("VLLM_API_KEY", None),
272272

273+
# Whether to log responses from API Server for debugging
274+
"VLLM_DEBUG_LOG_API_SERVER_RESPONSE":
275+
lambda: os.environ.get("VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False").
276+
lower() == "true",
277+
273278
# S3 access information, used for tensorizer to load model from S3
274279
"S3_ACCESS_KEY_ID":
275280
lambda: os.environ.get("S3_ACCESS_KEY_ID", None),

0 commit comments

Comments
 (0)