@@ -56,8 +56,8 @@ class BenchmarkMetrics:
5656 total_input : int
5757 total_output : int
5858 request_throughput : float
59- input_throughput : float
6059 output_throughput : float
60+ total_token_throughput : float
6161 mean_ttft_ms : float
6262 median_ttft_ms : float
6363 std_ttft_ms : float
@@ -283,8 +283,8 @@ def calculate_metrics(
283283 total_input = total_input ,
284284 total_output = sum (actual_output_lens ),
285285 request_throughput = completed / dur_s ,
286- input_throughput = total_input / dur_s ,
287286 output_throughput = sum (actual_output_lens ) / dur_s ,
287+ total_token_throughput = (total_input + sum (actual_output_lens )) / dur_s ,
288288 mean_ttft_ms = np .mean (ttfts or 0 ) *
289289 1000 , # ttfts is empty if streaming is not supported by backend
290290 std_ttft_ms = np .std (ttfts or 0 ) * 1000 ,
@@ -426,19 +426,19 @@ async def benchmark(
426426 metrics .total_output ))
427427 print ("{:<40} {:<10.2f}" .format ("Request throughput (req/s):" ,
428428 metrics .request_throughput ))
429- print ("{:<40} {:<10.2f}" .format ("Input token throughput (tok/s):" ,
430- metrics .input_throughput ))
431429 print ("{:<40} {:<10.2f}" .format ("Output token throughput (tok/s):" ,
432430 metrics .output_throughput ))
431+ print ("{:<40} {:<10.2f}" .format ("Total Token throughput (tok/s):" ,
432+ metrics .total_token_throughput ))
433433
434434 result = {
435435 "duration" : benchmark_duration ,
436436 "completed" : metrics .completed ,
437437 "total_input_tokens" : metrics .total_input ,
438438 "total_output_tokens" : metrics .total_output ,
439439 "request_throughput" : metrics .request_throughput ,
440- "input_throughput" : metrics .input_throughput ,
441440 "output_throughput" : metrics .output_throughput ,
441+ "total_token_throughput" : metrics .total_token_throughput ,
442442 "input_lens" : [output .prompt_len for output in outputs ],
443443 "output_lens" : actual_output_lens ,
444444 "ttfts" : [output .ttft for output in outputs ],
0 commit comments