Skip to content

Commit 7d19de2

Browse files
authored
[Frontend] Add "input speed" to tqdm postfix alongside output speed (#5425)
1 parent 94a07bb commit 7d19de2

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

vllm/entrypoints/llm.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -545,11 +545,13 @@ def _run_engine(
545545
total=num_requests,
546546
desc="Processed prompts",
547547
dynamic_ncols=True,
548-
postfix=f"Generation Speed: {0:.2f} toks/s",
548+
postfix=(f"est. speed input: {0:.2f} toks/s, "
549+
f"output: {0:.2f} toks/s"),
549550
)
550551
# Run the engine.
551552
outputs: List[Union[RequestOutput, EmbeddingRequestOutput]] = []
552-
total_toks = 0
553+
total_in_toks = 0
554+
total_out_toks = 0
553555
while self.llm_engine.has_unfinished_requests():
554556
step_outputs = self.llm_engine.step()
555557
for output in step_outputs:
@@ -558,10 +560,15 @@ def _run_engine(
558560
if use_tqdm:
559561
if isinstance(output, RequestOutput):
560562
# Calculate tokens only for RequestOutput
561-
total_toks += sum(
563+
total_in_toks += len(output.prompt_token_ids)
564+
in_spd = total_in_toks / pbar.format_dict["elapsed"]
565+
total_out_toks += sum(
562566
len(stp.token_ids) for stp in output.outputs)
563-
spd = total_toks / pbar.format_dict["elapsed"]
564-
pbar.postfix = f"Generation Speed: {spd:.2f} toks/s"
567+
out_spd = total_out_toks / pbar.format_dict[
568+
"elapsed"]
569+
pbar.postfix = (
570+
f"est. speed input: {in_spd:.2f} toks/s, "
571+
f"output: {out_spd:.2f} toks/s")
565572
pbar.update(1)
566573
if use_tqdm:
567574
pbar.close()

0 commit comments

Comments
 (0)