@@ -259,7 +259,7 @@ def main():
259259 seed_all (args .seed )
260260 url = args .url
261261 tokenizer = get_tokenizer (args .tokenizer_path )
262- # qps发送模式发送请求的数量不固定,这里暂定为reqs_num的10倍
262+ # qps发送模式发送请求的数量不固定,这里暂定为input_num的10倍
263263 prompts , input_lens , max_new_tokens = gen_random_data (
264264 args .input_len , args .output_len , 10 * args .input_num , tokenizer
265265 )
@@ -309,19 +309,21 @@ def main():
309309 )
310310 print (f"Total QPS: { valid_num / (end_time - start_time )} " )
311311 print (f"Sender QPS: { sent_reqs / (end_time - start_time )} " )
312- print (f"Avg Input Length: { sum (input_lens ) / len (input_lens )} " )
312+ print (f"Avg Input Length: { sum (input_lens [: valid_num ] ) / len (input_lens [: valid_num ] )} " )
313313 print (f"Avg Output Length: { sum (final_output_lens ) / len (final_output_lens )} " )
314- print (f"Total Throughput: { (sum (input_lens ) + sum (final_output_lens )) / (end_time - start_time )} token/s" )
315- print (f"Input Throughput: { sum (input_lens ) / (end_time - start_time )} token/s" )
314+ print (
315+ f"Total Throughput: { (sum (input_lens [:valid_num ]) + sum (final_output_lens )) / (end_time - start_time )} token/s"
316+ )
317+ print (f"Input Throughput: { sum (input_lens [:valid_num ]) / (end_time - start_time )} token/s" )
316318 print (f"Output Throughput: { sum (final_output_lens ) / (end_time - start_time )} token/s" )
317319 print ("-" * 10 )
318320 dump_dict ["request_num" ] = valid_num
319321 dump_dict ["Total QPS" ] = valid_num / (end_time - start_time )
320322 dump_dict ["Sender QPS" ] = sent_reqs / (end_time - start_time )
321- dump_dict ["Avg Input Length" ] = sum (input_lens ) / len (input_lens )
323+ dump_dict ["Avg Input Length" ] = sum (input_lens [: valid_num ] ) / len (input_lens [: valid_num ] )
322324 dump_dict ["Avg Output Length" ] = sum (final_output_lens ) / len (final_output_lens )
323- dump_dict ["Total Throughput" ] = (sum (input_lens ) + sum (final_output_lens )) / (end_time - start_time )
324- dump_dict ["Input Throughput" ] = sum (input_lens ) / (end_time - start_time )
325+ dump_dict ["Total Throughput" ] = (sum (input_lens [: valid_num ] ) + sum (final_output_lens )) / (end_time - start_time )
326+ dump_dict ["Input Throughput" ] = sum (input_lens [: valid_num ] ) / (end_time - start_time )
325327 dump_dict ["Output Throughput" ] = sum (final_output_lens ) / (end_time - start_time )
326328
327329 values = np .percentile (request_time , percentiles )
0 commit comments