22import requests
33import json
44import argparse
5+ import time
6+ import gradio_musa
57
68
79def parse_args ():
@@ -52,6 +54,10 @@ def chat_with_model_streaming(user_input, history):
5254 history = history or [] # 初始化历史记录
5355 bot_response = "" # 存储逐步生成的回答
5456
57+ # ✅ 记录开始时间
58+ start_time = time .time ()
59+ token_count = 0 # ✅ 记录生成的 Token 数量
60+
5561 try :
5662 # ✅ 使用 requests 的流式请求
5763 with requests .post (VLLM_API_URL , json = payload , stream = True ) as response :
@@ -70,25 +76,45 @@ def chat_with_model_streaming(user_input, history):
7076 if "content" in delta :
7177 bot_response += delta ["content" ]
7278 # ✅ 逐步更新聊天记录
73- yield history + [(user_input , bot_response )], ""
79+ token_count += 1 # ✅ 每个 Token 计数
80+ yield history + [(user_input , bot_response )], "" , "推理中..."
7481 except json .JSONDecodeError :
7582 pass
83+ # ✅ 记录结束时间 & 计算时长
84+ elapsed_time = time .time () - start_time
85+ tps = token_count / elapsed_time if elapsed_time > 0 else 0 # ✅ 计算 Tokens Per Second
86+
87+ speed_text = f"⏱️ 耗时: { elapsed_time :.2f} 秒 | 🔢 Tokens: { token_count } | ⚡ 速度: { tps :.2f} TPS"
88+ yield history + [(user_input , bot_response )], "" , speed_text # ✅ 返回推理速度
7689
7790 except Exception as e :
7891 bot_response = f"❌ 推理失败: { str (e )} "
7992 yield history + [(user_input , bot_response )], ""
8093
94+
95+
96+ # ✅ 清除聊天记录 & 计时器
97+ def clear_chat ():
98+ return [], "" , "⏱️ 耗时: 0.00 秒 | 🔢 Tokens: 0 | ⚡ 速度: 0.00 TPS" # ✅ 清空所有 UI
99+
81100# 构建 Gradio 界面
82- with gr .Blocks () as demo :
83- gr .Markdown ("## 💬 Web UI 接入 vLLM 模型(流式输出)" )
84- chatbot = gr .Chatbot ()
85- txt = gr .Textbox (placeholder = "请输入你的问题" , label = "输入" )
86- clear = gr .Button ("清除" )
87- submit = gr .Button ("提交" )
101+ with gradio_musa .Blocks () as demo :
102+ # gr.Markdown("## 💬 Web UI 接入 vLLM 模型(流式输出)")
103+ chatbot = gr .Chatbot (label = "Running on MTT S4000" )
104+ msg_input = gr .Textbox (placeholder = "请输入你的问题" , label = "输入..." , lines = 1 , autofocus = True )
105+
106+ speed_display = gr .Textbox (label = "推理速度" , value = "⏱️ 耗时: 0.00 秒 | 🔢 Tokens: 0 | ⚡ 速度: 0.00 TPS" , interactive = False ) # >✅ 显示推理速度
107+
108+ # clear = gr.Button("清除")
109+ # submit = gr.Button("提交")
110+ with gr .Row ():
111+ submit_btn = gr .Button (value = "提交" )
112+ clear_btn = gr .Button ("清除历史" ) # ✅ 添加清除按钮
88113
89114 # ✅ 使用流式函数
90- submit . click (chat_with_model_streaming , [ txt , chatbot ], [chatbot , txt ])
91- txt . submit (chat_with_model_streaming , [ txt , chatbot ], [chatbot , txt ])
92- clear .click (lambda : ([], "" ), [], [chatbot , txt ])
115+ msg_input . submit (chat_with_model_streaming , inputs = [ msg_input , chatbot ], outputs = [chatbot , msg_input , speed_display ]) # ✅ 按 Enter 触发
116+ submit_btn . click (chat_with_model_streaming , inputs = [ msg_input , chatbot ], outputs = [chatbot , msg_input , speed_display ]) # ✅ 按按钮触发
117+ clear_btn .click (clear_chat , inputs = [], outputs = [chatbot , msg_input , speed_display ]) # ✅ 清除聊天 & 计时
93118
119+ demo .queue () # ✅ 允许流式数据传输
94120demo .launch (server_name = args .ip )
0 commit comments