@@ -245,6 +245,7 @@ def create_stream_response_json(
245
245
index : int ,
246
246
text : str ,
247
247
finish_reason : Optional [str ] = None ,
248
+ usage : Optional [UsageInfo ] = None ,
248
249
) -> str :
249
250
choice_data = ChatCompletionResponseStreamChoice (
250
251
index = index ,
@@ -257,7 +258,10 @@ def create_stream_response_json(
257
258
model = model_name ,
258
259
choices = [choice_data ],
259
260
)
260
- response_json = response .json (ensure_ascii = False )
261
+ if usage is not None :
262
+ response .usage = usage
263
+ # exclude unset to leave details out of each sse
264
+ response_json = response .json (exclude_unset = True , ensure_ascii = False )
261
265
262
266
return response_json
263
267
@@ -283,17 +287,25 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
283
287
i = output .index
284
288
delta_text = output .text [len (previous_texts [i ]):]
285
289
previous_texts [i ] = output .text
286
- previous_num_tokens [i ] = len (output .token_ids )
290
+ completion_tokens = len (output .token_ids )
291
+ previous_num_tokens [i ] = completion_tokens
287
292
response_json = create_stream_response_json (
288
293
index = i ,
289
294
text = delta_text ,
290
295
)
291
296
yield f"data: { response_json } \n \n "
292
297
if output .finish_reason is not None :
298
+ prompt_tokens = len (res .prompt_token_ids )
299
+ final_usage = UsageInfo (
300
+ prompt_tokens = prompt_tokens ,
301
+ completion_tokens = completion_tokens ,
302
+ total_tokens = prompt_tokens + completion_tokens ,
303
+ )
293
304
response_json = create_stream_response_json (
294
305
index = i ,
295
306
text = "" ,
296
307
finish_reason = output .finish_reason ,
308
+ usage = final_usage ,
297
309
)
298
310
yield f"data: { response_json } \n \n "
299
311
yield "data: [DONE]\n \n "
@@ -462,6 +474,7 @@ def create_stream_response_json(
462
474
text : str ,
463
475
logprobs : Optional [LogProbs ] = None ,
464
476
finish_reason : Optional [str ] = None ,
477
+ usage : Optional [UsageInfo ] = None ,
465
478
) -> str :
466
479
choice_data = CompletionResponseStreamChoice (
467
480
index = index ,
@@ -475,7 +488,9 @@ def create_stream_response_json(
475
488
model = model_name ,
476
489
choices = [choice_data ],
477
490
)
478
- response_json = response .json (ensure_ascii = False )
491
+ if usage is not None :
492
+ response .usage = usage
493
+ response_json = response .json (exclude_unset = True , ensure_ascii = False )
479
494
480
495
return response_json
481
496
@@ -505,11 +520,19 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
505
520
if output .finish_reason is not None :
506
521
logprobs = (LogProbs ()
507
522
if request .logprobs is not None else None )
523
+ prompt_tokens = len (res .prompt_token_ids )
524
+ completion_tokens = len (output .token_ids )
525
+ final_usage = UsageInfo (
526
+ prompt_tokens = prompt_tokens ,
527
+ completion_tokens = completion_tokens ,
528
+ total_tokens = prompt_tokens + completion_tokens ,
529
+ )
508
530
response_json = create_stream_response_json (
509
531
index = i ,
510
532
text = "" ,
511
533
logprobs = logprobs ,
512
534
finish_reason = output .finish_reason ,
535
+ usage = final_usage ,
513
536
)
514
537
yield f"data: { response_json } \n \n "
515
538
yield "data: [DONE]\n \n "
0 commit comments