File tree Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -545,11 +545,13 @@ def _run_engine(
545
545
total = num_requests ,
546
546
desc = "Processed prompts" ,
547
547
dynamic_ncols = True ,
548
- postfix = f"Generation Speed: { 0 :.2f} toks/s" ,
548
+ postfix = (f"est. speed input: { 0 :.2f} toks/s, "
549
+ f"output: { 0 :.2f} toks/s" ),
549
550
)
550
551
# Run the engine.
551
552
outputs : List [Union [RequestOutput , EmbeddingRequestOutput ]] = []
552
- total_toks = 0
553
+ total_in_toks = 0
554
+ total_out_toks = 0
553
555
while self .llm_engine .has_unfinished_requests ():
554
556
step_outputs = self .llm_engine .step ()
555
557
for output in step_outputs :
@@ -558,10 +560,15 @@ def _run_engine(
558
560
if use_tqdm :
559
561
if isinstance (output , RequestOutput ):
560
562
# Calculate tokens only for RequestOutput
561
- total_toks += sum (
563
+ total_in_toks += len (output .prompt_token_ids )
564
+ in_spd = total_in_toks / pbar .format_dict ["elapsed" ]
565
+ total_out_toks += sum (
562
566
len (stp .token_ids ) for stp in output .outputs )
563
- spd = total_toks / pbar .format_dict ["elapsed" ]
564
- pbar .postfix = f"Generation Speed: { spd :.2f} toks/s"
567
+ out_spd = total_out_toks / pbar .format_dict [
568
+ "elapsed" ]
569
+ pbar .postfix = (
570
+ f"est. speed input: { in_spd :.2f} toks/s, "
571
+ f"output: { out_spd :.2f} toks/s" )
565
572
pbar .update (1 )
566
573
if use_tqdm :
567
574
pbar .close ()
You can’t perform that action at this time.
0 commit comments