@@ -336,12 +336,39 @@ async def send_request_to_decode(
336
336
"X-Request-Id" : request_id ,
337
337
}
338
338
339
- async with client .stream (
340
- "POST" , endpoint , json = req_data , headers = headers
341
- ) as response :
342
- response .raise_for_status ()
343
- async for chunk in response .aiter_bytes ():
344
- yield chunk
339
+ try :
340
+ async with client .stream (
341
+ "POST" , endpoint , json = req_data , headers = headers
342
+ ) as response :
343
+ response .raise_for_status ()
344
+ async for chunk in response .aiter_bytes ():
345
+ yield chunk
346
+ except httpx .HTTPStatusError as e :
347
+ logger .error (f"HTTP error in decode request: { e } " , exc_info = True )
348
+ try :
349
+ error_text = e .response .text
350
+ except Exception :
351
+ error_text = f"HTTP { e .response .status_code } "
352
+ # Yield error as JSON response
353
+ error_response = {
354
+ "error" : {
355
+ "message" : f"Backend error: { error_text } " ,
356
+ "type" : "backend_error" ,
357
+ "code" : e .response .status_code ,
358
+ }
359
+ }
360
+ yield json .dumps (error_response ).encode ("utf-8" )
361
+ except Exception as e :
362
+ logger .error (f"Unexpected error in decode request: { e } " , exc_info = True )
363
+ # Yield error as JSON response
364
+ error_response = {
365
+ "error" : {
366
+ "message" : f"Internal server error: { str (e )} " ,
367
+ "type" : "internal_error" ,
368
+ "code" : 500 ,
369
+ }
370
+ }
371
+ yield json .dumps (error_response ).encode ("utf-8" )
345
372
346
373
347
374
async def route_disaggregated_prefill_request (
0 commit comments