@@ -357,21 +357,75 @@ async def route_disaggregated_prefill_request(
357
357
orig_max_tokens = request_json .get ("max_tokens" , 0 )
358
358
request_json ["max_tokens" ] = 1
359
359
st = time .time ()
360
- await send_request_to_prefiller (
361
- request .app .state .prefill_client , endpoint , request_json , request_id
362
- )
363
- et = time .time ()
364
- logger .info (f"{ request_id } prefill time (TTFT): { et - st :.4f} " )
365
- logger .info (
366
- f"Routing request { request_id } with session id None to { request .app .state .prefill_client .base_url } at { et } , process time = { et - in_router_time :.4f} "
367
- )
368
- request_json ["max_tokens" ] = orig_max_tokens
360
+ try :
361
+ await send_request_to_prefiller (
362
+ request .app .state .prefill_client , endpoint , request_json , request_id
363
+ )
364
+ et = time .time ()
365
+ logger .info (f"{ request_id } prefill time (TTFT): { et - st :.4f} " )
366
+ logger .info (
367
+ f"Routing request { request_id } with session id None to { request .app .state .prefill_client .base_url } at { et } , process time = { et - in_router_time :.4f} "
368
+ )
369
+ request_json ["max_tokens" ] = orig_max_tokens
370
+ except httpx .HTTPStatusError as e :
371
+ logger .error (f"HTTP error in prefiller: { e } " , exc_info = True )
372
+ return JSONResponse (
373
+ status_code = e .response .status_code ,
374
+ content = {
375
+ "error" : {
376
+ "message" : f"Prefiller error: { e .response .text } " ,
377
+ "type" : "prefiller_error" ,
378
+ "code" : e .response .status_code ,
379
+ }
380
+ },
381
+ headers = {"X-Request-Id" : request_id },
382
+ )
383
+ except Exception as e :
384
+ logger .error (f"Unexpected error in prefiller: { e } " , exc_info = True )
385
+ return JSONResponse (
386
+ status_code = 500 ,
387
+ content = {
388
+ "error" : {
389
+ "message" : f"Prefiller error: { str (e )} " ,
390
+ "type" : "prefiller_error" ,
391
+ "code" : 500 ,
392
+ }
393
+ },
394
+ headers = {"X-Request-Id" : request_id },
395
+ )
369
396
370
397
async def generate_stream ():
371
- async for chunk in send_request_to_decode (
372
- request .app .state .decode_client , endpoint , request_json , request_id
373
- ):
374
- yield chunk
398
+ try :
399
+ async for chunk in send_request_to_decode (
400
+ request .app .state .decode_client , endpoint , request_json , request_id
401
+ ):
402
+ yield chunk
403
+ except httpx .HTTPStatusError as e :
404
+ logger .error (f"HTTP error in decoder: { e } " , exc_info = True )
405
+ try :
406
+ error_text = e .response .text
407
+ except Exception :
408
+ error_text = f"HTTP { e .response .status_code } "
409
+ # Yield error as JSON response
410
+ error_response = {
411
+ "error" : {
412
+ "message" : f"Decoder error: { error_text } " ,
413
+ "type" : "decoder_error" ,
414
+ "code" : e .response .status_code ,
415
+ }
416
+ }
417
+ yield json .dumps (error_response ).encode ("utf-8" )
418
+ except Exception as e :
419
+ logger .error (f"Unexpected error in decoder: { e } " , exc_info = True )
420
+ # Yield error as JSON response
421
+ error_response = {
422
+ "error" : {
423
+ "message" : f"Decoder error: { str (e )} " ,
424
+ "type" : "decoder_error" ,
425
+ "code" : 500 ,
426
+ }
427
+ }
428
+ yield json .dumps (error_response ).encode ("utf-8" )
375
429
376
430
curr_time = time .time ()
377
431
logger .info (
0 commit comments