@@ -52,6 +52,9 @@ class BenchmarkMetrics:
5252 mean_tpot_ms : float
5353 median_tpot_ms : float
5454 p99_tpot_ms : float
55+ mean_e2el_ms : float
56+ median_e2el_ms : float
57+ p99_e2el_ms : float
5558
5659
5760def sample_sharegpt_requests (
@@ -236,6 +239,7 @@ def calculate_metrics(
236239 completed = 0
237240 tpots = []
238241 ttfts = []
242+ e2els = []
239243 for i in range (len (outputs )):
240244 if outputs [i ].success :
241245 output_len = len (tokenizer (outputs [i ].generated_text ).input_ids )
@@ -245,6 +249,7 @@ def calculate_metrics(
245249 tpots .append (
246250 (outputs [i ].latency - outputs [i ].ttft ) / (output_len - 1 ))
247251 ttfts .append (outputs [i ].ttft )
252+ e2els .append (outputs [i ].latency )
248253 completed += 1
249254 else :
250255 actual_output_lens .append (0 )
@@ -263,6 +268,9 @@ def calculate_metrics(
263268 mean_tpot_ms = np .mean (tpots ) * 1000 ,
264269 median_tpot_ms = np .median (tpots ) * 1000 ,
265270 p99_tpot_ms = np .percentile (tpots , 99 ) * 1000 ,
271+ mean_e2el_ms = np .mean (e2els or 0 ) * 1000 ,
272+ median_e2el_ms = np .median (e2els or 0 ) * 1000 ,
273+ p99_e2el_ms = np .percentile (e2els , 99 ) * 1000 ,
266274 )
267275
268276 return metrics , actual_output_lens
@@ -359,6 +367,13 @@ async def limited_request_func(request_func_input, pbar):
359367 print ("{:<40} {:<10.2f}" .format ("Median TPOT (ms):" ,
360368 metrics .median_tpot_ms ))
361369 print ("{:<40} {:<10.2f}" .format ("P99 TPOT (ms):" , metrics .p99_tpot_ms ))
370+ print ("{s:{c}^{n}}" .format (s = 'Time End-to-end Latency' ,
371+ n = 50 ,
372+ c = '-' ))
373+ print ("{:<40} {:<10.2f}" .format ("Mean E2EL (ms):" , metrics .mean_e2el_ms ))
374+ print ("{:<40} {:<10.2f}" .format ("Median E2EL (ms):" ,
375+ metrics .median_e2el_ms ))
376+ print ("{:<40} {:<10.2f}" .format ("P99 E2EL (ms):" , metrics .p99_e2el_ms ))
362377 print ("=" * 50 )
363378
364379 result = {
@@ -375,10 +390,14 @@ async def limited_request_func(request_func_input, pbar):
375390 "mean_tpot_ms" : metrics .mean_tpot_ms ,
376391 "median_tpot_ms" : metrics .median_tpot_ms ,
377392 "p99_tpot_ms" : metrics .p99_tpot_ms ,
393+ "mean_e2el_ms" : metrics .mean_e2el_ms ,
394+ "median_e2el_ms" : metrics .median_e2el_ms ,
395+ "p99_e2el_ms" : metrics .p99_e2el_ms ,
378396 "input_lens" : [output .prompt_len for output in outputs ],
379397 "output_lens" : actual_output_lens ,
380398 "ttfts" : [output .ttft for output in outputs ],
381399 "itls" : [output .itl for output in outputs ],
400+ "e2els" : [output .latency for output in outputs ],
382401 "generated_texts" : [output .generated_text for output in outputs ],
383402 "errors" : [output .error for output in outputs ],
384403 }
0 commit comments