55
66# Third Party
77from fastapi import Request
8- from fastapi .responses import JSONResponse
8+ from fastapi .responses import JSONResponse , RequestValidationError
99from starlette .datastructures import State
1010from vllm .config import ModelConfig
1111from vllm .engine .protocol import EngineClient
4141 # Third Party
4242 from vllm .reasoning import ReasoningParserManager
4343
44+
4445TIMEOUT_KEEP_ALIVE = 5 # seconds
4546
4647# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
@@ -162,6 +163,19 @@ def signal_handler(*_) -> None:
162163 # Use vllm build_app which adds middleware
163164 app = api_server .build_app (args )
164165
166+ # Override exception handler to flatten errors for detectors API
167+ @app .exception_handler (RequestValidationError )
168+ async def validation_exception_handler (
169+ request : Request , exc : RequestValidationError
170+ ):
171+ if request .url .path .startswith ("/api/v1/text" ):
172+ # Flatten Pydantic validation errors
173+ return JSONResponse (
174+ status_code = exc .error .code , content = exc .error .model_dump ()
175+ )
176+ # For other routes, let FastAPI handle normally
177+ raise exc
178+
165179 # api_server.init_app_state takes vllm_config
166180 # ref. https://github.com/vllm-project/vllm/pull/16572
167181 if hasattr (engine_client , "get_vllm_config" ):
@@ -213,7 +227,6 @@ async def create_chat_detection(request: ChatDetectionRequest, raw_request: Requ
213227 detector_response = await chat_detection (raw_request ).chat (request , raw_request )
214228
215229 if isinstance (detector_response , ErrorResponse ):
216- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
217230 return JSONResponse (
218231 content = detector_response .error .model_dump (),
219232 status_code = detector_response .error .code ,
@@ -236,7 +249,6 @@ async def create_context_doc_detection(
236249 )
237250
238251 if isinstance (detector_response , ErrorResponse ):
239- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
240252 return JSONResponse (
241253 content = detector_response .error .model_dump (),
242254 status_code = detector_response .error .code ,
@@ -258,7 +270,6 @@ async def create_contents_detection(
258270 request , raw_request
259271 )
260272 if isinstance (detector_response , ErrorResponse ):
261- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
262273 return JSONResponse (
263274 content = detector_response .error .model_dump (),
264275 status_code = detector_response .error .code ,
@@ -280,7 +291,6 @@ async def create_generation_detection(
280291 request , raw_request
281292 )
282293 if isinstance (detector_response , ErrorResponse ):
283- # ErrorResponse includes code and message, corresponding to errors for the detectorAPI
284294 return JSONResponse (
285295 content = detector_response .error .model_dump (),
286296 status_code = detector_response .error .code ,
0 commit comments