Skip to content

Commit 6579184

Browse files
authored
Add files via upload
1 parent 00f3e4c commit 6579184

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

vllm/benchmarks/benchmark_serving.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class BenchmarkMetrics:
5252
mean_tpot_ms: float
5353
median_tpot_ms: float
5454
p99_tpot_ms: float
55+
mean_e2el_ms: float
56+
median_e2el_ms: float
57+
p99_e2el_ms: float
5558

5659

5760
def sample_sharegpt_requests(
@@ -236,6 +239,7 @@ def calculate_metrics(
236239
completed = 0
237240
tpots = []
238241
ttfts = []
242+
e2els = []
239243
for i in range(len(outputs)):
240244
if outputs[i].success:
241245
output_len = len(tokenizer(outputs[i].generated_text).input_ids)
@@ -245,6 +249,7 @@ def calculate_metrics(
245249
tpots.append(
246250
(outputs[i].latency - outputs[i].ttft) / (output_len - 1))
247251
ttfts.append(outputs[i].ttft)
252+
e2els.append(outputs[i].latency)
248253
completed += 1
249254
else:
250255
actual_output_lens.append(0)
@@ -263,6 +268,9 @@ def calculate_metrics(
263268
mean_tpot_ms=np.mean(tpots) * 1000,
264269
median_tpot_ms=np.median(tpots) * 1000,
265270
p99_tpot_ms=np.percentile(tpots, 99) * 1000,
271+
mean_e2el_ms=np.mean(e2els or 0) * 1000,
272+
median_e2el_ms=np.median(e2els or 0) * 1000,
273+
p99_e2el_ms=np.percentile(e2els, 99) * 1000,
266274
)
267275

268276
return metrics, actual_output_lens
@@ -359,6 +367,13 @@ async def limited_request_func(request_func_input, pbar):
359367
print("{:<40} {:<10.2f}".format("Median TPOT (ms):",
360368
metrics.median_tpot_ms))
361369
print("{:<40} {:<10.2f}".format("P99 TPOT (ms):", metrics.p99_tpot_ms))
370+
print("{s:{c}^{n}}".format(s='Time End-to-end Latency',
371+
n=50,
372+
c='-'))
373+
print("{:<40} {:<10.2f}".format("Mean E2EL (ms):", metrics.mean_e2el_ms))
374+
print("{:<40} {:<10.2f}".format("Median E2EL (ms):",
375+
metrics.median_e2el_ms))
376+
print("{:<40} {:<10.2f}".format("P99 E2EL (ms):", metrics.p99_e2el_ms))
362377
print("=" * 50)
363378

364379
result = {
@@ -375,10 +390,14 @@ async def limited_request_func(request_func_input, pbar):
375390
"mean_tpot_ms": metrics.mean_tpot_ms,
376391
"median_tpot_ms": metrics.median_tpot_ms,
377392
"p99_tpot_ms": metrics.p99_tpot_ms,
393+
"mean_e2el_ms": metrics.mean_e2el_ms,
394+
"median_e2el_ms": metrics.median_e2el_ms,
395+
"p99_e2el_ms": metrics.p99_e2el_ms,
378396
"input_lens": [output.prompt_len for output in outputs],
379397
"output_lens": actual_output_lens,
380398
"ttfts": [output.ttft for output in outputs],
381399
"itls": [output.itl for output in outputs],
400+
"e2els": [output.latency for output in outputs],
382401
"generated_texts": [output.generated_text for output in outputs],
383402
"errors": [output.error for output in outputs],
384403
}

0 commit comments

Comments
 (0)