We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d4c2ff0 commit 1bf5b04Copy full SHA for 1bf5b04
vllm/demo/gradio_demo/app.py
@@ -88,7 +88,7 @@ def chat_with_model_streaming(user_input, history):
88
first_token_latency = first_token_time - start_time if first_token_time is not None else 0
89
elapsed_time = time.time() - first_token_time
90
tps = token_count / elapsed_time if elapsed_time > 0 else 0 # ✅ 计算 Tokens Per Second
91
- speed_text = f"⏳ 首字延迟: {first_token_latency:.2f} | ⏱️ 耗时: {elapsed_time:.2f} 秒 | 🔢 Tokens: {token_count} | ⚡ 速度: {tps:.2f} TPS" # ⏳
+ speed_text = f"⏳ 首字延迟: {first_token_latency:.2f} 秒 | ⏱️ 耗时: {elapsed_time:.2f} 秒 | 🔢 Tokens: {token_count} | ⚡ 速度: {tps:.2f} TPS" # ⏳
92
yield history + [(user_input, bot_response)], "", speed_text # ✅ 返回推理速度
93
94
except Exception as e:
0 commit comments