Skip to content

Commit 736438f

Browse files
authored
Merge pull request #39 from MooreThreads/modify_vllm_demo
fix(vllm): fix a bug in json and modify attention for vllm server
2 parents c65f6a8 + 12c3692 commit 736438f

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

vllm/demo/run_vllm_serving.sh

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -197,19 +197,16 @@ wait_for_log_update() {
197197
local elapsed=0
198198
local no_change_count=0
199199

200-
# 确保日志文件存在
201-
touch "$log_file"
202-
203200
# 获取初始日志文件大小
204201
local last_size=$(stat -c%s "$log_file")
205202

206203
while ((elapsed < timeout)); do
207-
sleep 10
208204
local current_size=$(stat -c%s "$log_file")
205+
local last_line=$(tail -n 5 "$log_file" 2>/dev/null | grep -E -v '^[[:space:]]*$')
209206

210-
echo -e "\e[32m"
211-
if [[ "$current_size" -eq "$last_size" ]]; then
212-
207+
if grep -q -E "Uvicorn running on http://" <<< "$last_line" && \
208+
[ "$current_size" -ne "$last_size" ]; then
209+
echo -e "\e[32m"
213210
if [ -z "$CONTAINER_NAME" ]; then
214211
echo "Please send the following request to obtain the model inference result."
215212
else
@@ -256,8 +253,9 @@ start_server() {
256253
local served_model_name="$3"
257254

258255
log_file=$(dirname "$converted_model_path")/model_server.log
256+
: > "$log_file"
259257
echo "Wait for the service to start..."
260-
python -m vllm.entrypoints.openai.api_server \
258+
PYTHONUNBUFFERED=1 setsid python -m vllm.entrypoints.openai.api_server \
261259
--model "$converted_model_path" \
262260
--trust-remote-code \
263261
--tensor-parallel-size "$tensor_parallel_size" \
@@ -267,7 +265,7 @@ start_server() {
267265
--disable-log-stats \
268266
--disable-log-requests \
269267
--device "musa" \
270-
--served-model-name "$served_model_name" "$log_file" 2>&1 &
268+
--served-model-name "$served_model_name" 2>&1 | tee -a "$log_file" &
271269

272270
SERVER_PID=$!
273271

vllm/demo/supported_models.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"deepseek-r1-distill-qwen-1.5b": {
3-
"name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
3+
"name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
44
"modelscope_url": "https://www.modelscope.cn/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B.git",
5-
"huggingface_url": "https://www.modelscope.cn/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
5+
"huggingface_url": "https://www.modelscope.cn/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
66
"tensor_parallel_size": [1]
77
},
88

0 commit comments

Comments
 (0)