@@ -227,6 +227,14 @@ async def completion_full_generator(
227
227
completion_batched_token_ids = [[] for _ in range (num_choices )]
228
228
current_waiting_time = 0
229
229
while num_choices > 0 :
230
+ if self .engine_client .check_model_weight_status ():
231
+ return ErrorResponse (
232
+ error = ErrorInfo (
233
+ message = "Model weight cleared" ,
234
+ code = ErrorCode .INVALID_VALUE ,
235
+ type = ErrorType .INVALID_REQUEST_ERROR ,
236
+ )
237
+ )
230
238
try :
231
239
response = await asyncio .wait_for (response_queue .get (), timeout = 10 )
232
240
current_waiting_time = 0
@@ -281,7 +289,6 @@ async def completion_full_generator(
281
289
return res
282
290
except Exception as e :
283
291
api_server_logger .error (f"Error in completion_full_generator: { e } " , exc_info = True )
284
- raise
285
292
finally :
286
293
self .engine_client .semaphore .release ()
287
294
if dealer is not None :
@@ -360,6 +367,8 @@ async def completion_stream_generator(
360
367
)
361
368
current_waiting_time = 0
362
369
while num_choices > 0 :
370
+ if self .engine_client .check_model_weight_status ():
371
+ raise ValueError ("Engine is clearing model weight" )
363
372
try :
364
373
response = await asyncio .wait_for (response_queue .get (), timeout = 10 )
365
374
current_waiting_time = 0
@@ -447,6 +456,7 @@ async def completion_stream_generator(
447
456
choices [- 1 ].finish_reason = self .calc_finish_reason (
448
457
request .max_tokens , output_tokens [idx ], output , tool_called [idx ]
449
458
)
459
+
450
460
send_idx = output .get ("send_idx" )
451
461
# 只有当 send_idx 明确为 0 时才记录日志
452
462
if send_idx == 0 and not request .return_token_ids :
0 commit comments