Skip to content

Commit b21544b

Browse files
authored
Merge pull request #40 from MooreThreads/add_webUI_demo
Add web UI demo
2 parents 736438f + d96a131 commit b21544b

File tree

3 files changed

+323
-27
lines changed

3 files changed

+323
-27
lines changed

vllm/demo/gradio_demo/app.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import gradio as gr
2+
import requests
3+
import json
4+
import argparse
5+
import time
6+
import gradio_musa
7+
8+
9+
def parse_args():
10+
# 创建 ArgumentParser 对象
11+
parser = argparse.ArgumentParser(description="Start the vLLM server.")
12+
13+
# 添加命令行参数
14+
parser.add_argument(
15+
"--ip",
16+
type=str,
17+
default="0.0.0.0", # 如果没有传入--ip,使用默认值
18+
help="IP address to bind to (default: 0.0.0.0)"
19+
)
20+
21+
parser.add_argument(
22+
"--port",
23+
type=str,
24+
default="8000", # 如果没有传入--port,使用默认值
25+
help="Port number to use (default: 8000)"
26+
)
27+
parser.add_argument(
28+
"--model-name",
29+
type=str,
30+
help="Model Name"
31+
)
32+
33+
# 解析传入的参数
34+
args = parser.parse_args()
35+
return args
36+
37+
args = parse_args()
38+
# 配置 vLLM 推理服务的地址和模型名
39+
VLLM_API_URL = f"http://{args.ip}:{args.port}/v1/chat/completions"
40+
MODEL_NAME = args.model_name
41+
42+
43+
# ✅ 流式请求函数
44+
def chat_with_model_streaming(user_input, history):
45+
messages = [{"role": "system", "content": "You are a helpful assistant."}]
46+
messages.append({"role": "user", "content": user_input})
47+
48+
payload = {
49+
"model": MODEL_NAME,
50+
"messages": messages,
51+
"stream": True # ✅ 启用流式输出
52+
}
53+
54+
history = history or [] # 初始化历史记录
55+
bot_response = "" # 存储逐步生成的回答
56+
57+
# ✅ 记录开始时间
58+
start_time = time.time()
59+
token_count = 0 # ✅ 记录生成的 Token 数量
60+
61+
try:
62+
# ✅ 使用 requests 的流式请求
63+
with requests.post(VLLM_API_URL, json=payload, stream=True) as response:
64+
response.raise_for_status()
65+
66+
# ✅ 逐块解析流式响应
67+
for chunk in response.iter_lines():
68+
if chunk:
69+
chunk_str = chunk.decode("utf-8").strip()
70+
if chunk_str.startswith("data: "):
71+
chunk_data = chunk_str[6:] # 去掉 "data: " 前缀
72+
if chunk_data != "[DONE]":
73+
try:
74+
chunk_json = json.loads(chunk_data)
75+
delta = chunk_json["choices"][0]["delta"]
76+
if "content" in delta:
77+
bot_response += delta["content"]
78+
# ✅ 逐步更新聊天记录
79+
token_count += 1 # ✅ 每个 Token 计数
80+
yield history + [(user_input, bot_response)], "", "推理中..."
81+
except json.JSONDecodeError:
82+
pass
83+
# ✅ 记录结束时间 & 计算时长
84+
elapsed_time = time.time() - start_time
85+
tps = token_count / elapsed_time if elapsed_time > 0 else 0 # ✅ 计算 Tokens Per Second
86+
87+
speed_text = f"⏱️ 耗时: {elapsed_time:.2f} 秒 | 🔢 Tokens: {token_count} | ⚡ 速度: {tps:.2f} TPS"
88+
yield history + [(user_input, bot_response)], "", speed_text # ✅ 返回推理速度
89+
90+
except Exception as e:
91+
bot_response = f"❌ 推理失败: {str(e)}"
92+
yield history + [(user_input, bot_response)], ""
93+
94+
95+
96+
# ✅ 清除聊天记录 & 计时器
97+
def clear_chat():
98+
return [], "", "⏱️ 耗时: 0.00 秒 | 🔢 Tokens: 0 | ⚡ 速度: 0.00 TPS" # ✅ 清空所有 UI
99+
100+
# 构建 Gradio 界面
101+
with gradio_musa.Blocks() as demo:
102+
# gr.Markdown("## 💬 Web UI 接入 vLLM 模型(流式输出)")
103+
chatbot = gr.Chatbot(label="Running on MTT S4000")
104+
msg_input = gr.Textbox(placeholder="请输入你的问题", label="输入...", lines=1, autofocus=True)
105+
106+
speed_display = gr.Textbox(label="推理速度", value="⏱️ 耗时: 0.00 秒 | 🔢 Tokens: 0 | ⚡ 速度: 0.00 TPS", interactive=False) # >✅ 显示推理速度
107+
108+
# clear = gr.Button("清除")
109+
# submit = gr.Button("提交")
110+
with gr.Row():
111+
submit_btn = gr.Button(value="提交")
112+
clear_btn = gr.Button("清除历史") # ✅ 添加清除按钮
113+
114+
# ✅ 使用流式函数
115+
msg_input.submit(chat_with_model_streaming, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input, speed_display]) # ✅ 按 Enter 触发
116+
submit_btn.click(chat_with_model_streaming, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input, speed_display]) # ✅ 按按钮触发
117+
clear_btn.click(clear_chat, inputs=[], outputs=[chatbot, msg_input, speed_display]) # ✅ 清除聊天 & 计时
118+
119+
demo.queue() # ✅ 允许流式数据传输
120+
demo.launch(server_name=args.ip)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import gradio as gr
2+
3+
4+
TITLE=""
5+
6+
TOP = """\
7+
<div class="top">
8+
<div class="top-container">
9+
<img class="logo" width="140" height="37" src="https://kuae-playground.mthreads.com/image/[email protected]">
10+
<h2>GPU GENIUS</h2>
11+
</div>
12+
</div>"""
13+
14+
FOOTER = '''\
15+
<div class="footer">
16+
<span>Copyright © 2024-2025 摩尔线程版权所有 京公网安备 11010802036174号 京ICP证2020041674号</span>
17+
</div>'''
18+
js_change_title = '''\
19+
window.onload = function() {
20+
document.title = "''' + TITLE + '''";
21+
}'''
22+
HEADER = TOP + "<h1>" + TITLE + "</h1><p>"
23+
24+
25+
26+
CSS='''body {
27+
margin: 0;
28+
background: #F8F8F8;
29+
font-size: 22px;
30+
color: #666666;
31+
}
32+
p {
33+
font-size: 16px;
34+
}
35+
.top {
36+
left: 0;
37+
top: 0;
38+
height: 3.83%;
39+
opacity: 1;
40+
justify-content: center;
41+
background: white;
42+
}
43+
.top-container {
44+
/* 原样式 */
45+
margin: 0 auto;
46+
max-width: 1500px;
47+
padding: 10px;
48+
display: flex;
49+
padding: 16px 0;
50+
overflow: hidden;
51+
}
52+
.logo {
53+
margin: 0;
54+
padding: 0 20px;
55+
}
56+
h2 {
57+
position: relative;
58+
margin: 0;
59+
font-size: 21px;
60+
font-weight: normal;
61+
line-height: 20px;
62+
letter-spacing: 0;
63+
padding: 5px 20px;
64+
color: #666666;
65+
}
66+
.top-container>h2:before {
67+
background: #dcdfe6;
68+
/* 设置背景颜色为浅灰色 */
69+
content: "";
70+
/* 伪元素的内容,这里为空,意味着不会显示任何文本 */
71+
height: 16px;
72+
/* 设置伪元素的高度为 16px */
73+
left: 0;
74+
/* 设置伪元素的左边距为 0,即与其定位的父元素(这里是 h2)的左边对齐 */
75+
position: absolute;
76+
/* 设置位置为绝对定位,从而允许我们根据父元素进行准确放置 */
77+
top: 50%;
78+
/* 设置伪元素的上边距为 50%,这样伪元素的顶部将对齐到父元素的中间 */
79+
transform: translateY(-50%);
80+
/* 将伪元素向上移动自身高度的一半,即使其完全居中于父元素 */
81+
width: 1px;
82+
/* 设置伪元素的宽度为 1px,表现为一个细线 */
83+
}
84+
h1 {
85+
text-align: center;
86+
display: block;
87+
}
88+
89+
.footer {
90+
padding: 20px;
91+
text-align: center;
92+
font-size: 16px;
93+
}
94+
95+
.footer .logo {
96+
display: inline-block;
97+
/* 内联块元素使其与文本对齐 */
98+
margin-right: 10px;
99+
/* 右边距 */
100+
}
101+
102+
.footer a {
103+
color: #666666;
104+
text-decoration: none;
105+
}
106+
footer {
107+
visible;
108+
}'''
109+
110+
111+
class Blocks(gr.Blocks):
112+
def __init__(self, **kwargs):
113+
super().__init__(css=CSS, js=js_change_title)
114+
115+
def __enter__(self):
116+
r = super().__enter__()
117+
gr.HTML(HEADER)
118+
return r
119+
120+
def __exit__(self, exc_type, exc_value, traceback):
121+
gr.HTML(FOOTER)
122+
return super().__exit__(exc_type, exc_value, traceback)

0 commit comments

Comments
 (0)