62
62
DetokenizeRequest ,
63
63
DetokenizeResponse ,
64
64
EmbeddingRequest ,
65
- EmbeddingResponse , ErrorResponse ,
65
+ EmbeddingResponse , ErrorInfo ,
66
+ ErrorResponse ,
66
67
LoadLoRAAdapterRequest ,
67
68
PoolingRequest , PoolingResponse ,
68
69
RerankRequest , RerankResponse ,
@@ -506,7 +507,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
506
507
507
508
if isinstance (generator , ErrorResponse ):
508
509
return JSONResponse (content = generator .model_dump (),
509
- status_code = generator .code )
510
+ status_code = generator .error . code )
510
511
elif isinstance (generator , TokenizeResponse ):
511
512
return JSONResponse (content = generator .model_dump ())
512
513
@@ -540,7 +541,7 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
540
541
541
542
if isinstance (generator , ErrorResponse ):
542
543
return JSONResponse (content = generator .model_dump (),
543
- status_code = generator .code )
544
+ status_code = generator .error . code )
544
545
elif isinstance (generator , DetokenizeResponse ):
545
546
return JSONResponse (content = generator .model_dump ())
546
547
@@ -556,7 +557,7 @@ async def get_tokenizer_info(raw_request: Request):
556
557
"""Get comprehensive tokenizer information."""
557
558
result = await tokenization (raw_request ).get_tokenizer_info ()
558
559
return JSONResponse (content = result .model_dump (),
559
- status_code = result .code if isinstance (
560
+ status_code = result .error . code if isinstance (
560
561
result , ErrorResponse ) else 200 )
561
562
562
563
@@ -603,7 +604,7 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
603
604
604
605
if isinstance (generator , ErrorResponse ):
605
606
return JSONResponse (content = generator .model_dump (),
606
- status_code = generator .code )
607
+ status_code = generator .error . code )
607
608
elif isinstance (generator , ResponsesResponse ):
608
609
return JSONResponse (content = generator .model_dump ())
609
610
return StreamingResponse (content = generator , media_type = "text/event-stream" )
@@ -620,7 +621,7 @@ async def retrieve_responses(response_id: str, raw_request: Request):
620
621
621
622
if isinstance (response , ErrorResponse ):
622
623
return JSONResponse (content = response .model_dump (),
623
- status_code = response .code )
624
+ status_code = response .error . code )
624
625
return JSONResponse (content = response .model_dump ())
625
626
626
627
@@ -635,7 +636,7 @@ async def cancel_responses(response_id: str, raw_request: Request):
635
636
636
637
if isinstance (response , ErrorResponse ):
637
638
return JSONResponse (content = response .model_dump (),
638
- status_code = response .code )
639
+ status_code = response .error . code )
639
640
return JSONResponse (content = response .model_dump ())
640
641
641
642
@@ -670,7 +671,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
670
671
671
672
if isinstance (generator , ErrorResponse ):
672
673
return JSONResponse (content = generator .model_dump (),
673
- status_code = generator .code )
674
+ status_code = generator .error . code )
674
675
675
676
elif isinstance (generator , ChatCompletionResponse ):
676
677
return JSONResponse (content = generator .model_dump ())
@@ -715,7 +716,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
715
716
716
717
if isinstance (generator , ErrorResponse ):
717
718
return JSONResponse (content = generator .model_dump (),
718
- status_code = generator .code )
719
+ status_code = generator .error . code )
719
720
elif isinstance (generator , CompletionResponse ):
720
721
return JSONResponse (content = generator .model_dump ())
721
722
@@ -744,7 +745,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
744
745
745
746
if isinstance (generator , ErrorResponse ):
746
747
return JSONResponse (content = generator .model_dump (),
747
- status_code = generator .code )
748
+ status_code = generator .error . code )
748
749
elif isinstance (generator , EmbeddingResponse ):
749
750
return JSONResponse (content = generator .model_dump ())
750
751
@@ -772,7 +773,7 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
772
773
generator = await handler .create_pooling (request , raw_request )
773
774
if isinstance (generator , ErrorResponse ):
774
775
return JSONResponse (content = generator .model_dump (),
775
- status_code = generator .code )
776
+ status_code = generator .error . code )
776
777
elif isinstance (generator , PoolingResponse ):
777
778
return JSONResponse (content = generator .model_dump ())
778
779
@@ -792,7 +793,7 @@ async def create_classify(request: ClassificationRequest,
792
793
generator = await handler .create_classify (request , raw_request )
793
794
if isinstance (generator , ErrorResponse ):
794
795
return JSONResponse (content = generator .model_dump (),
795
- status_code = generator .code )
796
+ status_code = generator .error . code )
796
797
797
798
elif isinstance (generator , ClassificationResponse ):
798
799
return JSONResponse (content = generator .model_dump ())
@@ -821,7 +822,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
821
822
generator = await handler .create_score (request , raw_request )
822
823
if isinstance (generator , ErrorResponse ):
823
824
return JSONResponse (content = generator .model_dump (),
824
- status_code = generator .code )
825
+ status_code = generator .error . code )
825
826
elif isinstance (generator , ScoreResponse ):
826
827
return JSONResponse (content = generator .model_dump ())
827
828
@@ -881,7 +882,7 @@ async def create_transcriptions(raw_request: Request,
881
882
882
883
if isinstance (generator , ErrorResponse ):
883
884
return JSONResponse (content = generator .model_dump (),
884
- status_code = generator .code )
885
+ status_code = generator .error . code )
885
886
886
887
elif isinstance (generator , TranscriptionResponse ):
887
888
return JSONResponse (content = generator .model_dump ())
@@ -922,7 +923,7 @@ async def create_translations(request: Annotated[TranslationRequest,
922
923
923
924
if isinstance (generator , ErrorResponse ):
924
925
return JSONResponse (content = generator .model_dump (),
925
- status_code = generator .code )
926
+ status_code = generator .error . code )
926
927
927
928
elif isinstance (generator , TranslationResponse ):
928
929
return JSONResponse (content = generator .model_dump ())
@@ -950,7 +951,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
950
951
generator = await handler .do_rerank (request , raw_request )
951
952
if isinstance (generator , ErrorResponse ):
952
953
return JSONResponse (content = generator .model_dump (),
953
- status_code = generator .code )
954
+ status_code = generator .error . code )
954
955
elif isinstance (generator , RerankResponse ):
955
956
return JSONResponse (content = generator .model_dump ())
956
957
@@ -1175,7 +1176,7 @@ async def invocations(raw_request: Request):
1175
1176
msg = ("Cannot find suitable handler for request. "
1176
1177
f"Expected one of: { type_names } " )
1177
1178
res = base (raw_request ).create_error_response (message = msg )
1178
- return JSONResponse (content = res .model_dump (), status_code = res .code )
1179
+ return JSONResponse (content = res .model_dump (), status_code = res .error . code )
1179
1180
1180
1181
1181
1182
if envs .VLLM_TORCH_PROFILER_DIR :
@@ -1211,7 +1212,7 @@ async def load_lora_adapter(request: LoadLoRAAdapterRequest,
1211
1212
response = await handler .load_lora_adapter (request )
1212
1213
if isinstance (response , ErrorResponse ):
1213
1214
return JSONResponse (content = response .model_dump (),
1214
- status_code = response .code )
1215
+ status_code = response .error . code )
1215
1216
1216
1217
return Response (status_code = 200 , content = response )
1217
1218
@@ -1223,7 +1224,7 @@ async def unload_lora_adapter(request: UnloadLoRAAdapterRequest,
1223
1224
response = await handler .unload_lora_adapter (request )
1224
1225
if isinstance (response , ErrorResponse ):
1225
1226
return JSONResponse (content = response .model_dump (),
1226
- status_code = response .code )
1227
+ status_code = response .error . code )
1227
1228
1228
1229
return Response (status_code = 200 , content = response )
1229
1230
@@ -1502,9 +1503,10 @@ def build_app(args: Namespace) -> FastAPI:
1502
1503
1503
1504
@app .exception_handler (HTTPException )
1504
1505
async def http_exception_handler (_ : Request , exc : HTTPException ):
1505
- err = ErrorResponse (message = exc .detail ,
1506
+ err = ErrorResponse (
1507
+ error = ErrorInfo (message = exc .detail ,
1506
1508
type = HTTPStatus (exc .status_code ).phrase ,
1507
- code = exc .status_code )
1509
+ code = exc .status_code ))
1508
1510
return JSONResponse (err .model_dump (), status_code = exc .status_code )
1509
1511
1510
1512
@app .exception_handler (RequestValidationError )
@@ -1518,9 +1520,9 @@ async def validation_exception_handler(_: Request,
1518
1520
else :
1519
1521
message = exc_str
1520
1522
1521
- err = ErrorResponse (message = message ,
1522
- type = HTTPStatus .BAD_REQUEST .phrase ,
1523
- code = HTTPStatus .BAD_REQUEST )
1523
+ err = ErrorResponse (error = ErrorInfo ( message = message ,
1524
+ type = HTTPStatus .BAD_REQUEST .phrase ,
1525
+ code = HTTPStatus .BAD_REQUEST ) )
1524
1526
return JSONResponse (err .model_dump (),
1525
1527
status_code = HTTPStatus .BAD_REQUEST )
1526
1528
0 commit comments