Skip to content

Commit c65f6a8

Browse files
authored
Merge pull request #38 from MooreThreads/modify_vllm_demo
fix(vllm): modify vllm demo script and json
2 parents b45d47c + c39b243 commit c65f6a8

File tree

3 files changed

+309
-76
lines changed

3 files changed

+309
-76
lines changed

vllm/demo/model_config.json

Lines changed: 0 additions & 10 deletions
This file was deleted.

vllm/demo/run_vllm_serving.sh

Lines changed: 280 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,300 @@
11
#!/bin/bash
2-
set -e
32

4-
MODEL_NAME="$1"
5-
CONFIG_FILE="model_config.json"
3+
# 默认值
4+
MODEL=""
5+
CONVERTED_MODEL=""
6+
TP_SIZE=""
7+
MODEL_CONFIG_FILE="supported_models.json"
8+
TIME=$(date "+%Y%m%d_%H%M%S")
9+
DEFAUTL_MODEL_DIR="/data/musa_develop_demo_$TIME"
610

7-
if [ -z "$MODEL_NAME" ]; then
8-
echo "× Please provide the model name, for example:"
9-
echo " ./run_vllm_serving.sh DeepSeek-R1-Distill-Qwen-1.5B"
10-
exit 1
11-
fi
1211

13-
if [ ! -f "$CONFIG_FILE" ]; then
14-
echo "× Error: Config file $CONFIG_FILE not found! Please make sure it exists."
12+
# 解析参数的函数
13+
parse_args() {
14+
while [[ $# -gt 0 ]]; do
15+
case "$1" in
16+
--task) # model_name
17+
TASK="$2"
18+
shift 2
19+
;;
20+
--model) # original model path
21+
MODEL="$2"
22+
shift 2
23+
;;
24+
--converted-model) # converted model path
25+
CONVERTED_MODEL="$2"
26+
shift 2
27+
;;
28+
-tp-size)
29+
TP_SIZE="$2"
30+
shift 2
31+
;;
32+
--container-name)
33+
CONTAINER_NAME="$2"
34+
shift 2
35+
;;
36+
*)
37+
echo "未知参数: $1"
38+
exit 1
39+
;;
40+
esac
41+
done
42+
43+
validate_args() {
44+
if [[ -z "$TASK" ]]; then
45+
echo "Usage: $0 --task <model_name> [--model <original_model_path> | --converted-model <converted_model_path>] [-tp-size <tensor_parallel_size>]"
1546
exit 1
1647
fi
48+
}
1749

18-
read MODEL_URL TENSOR_PARALLEL_SIZE <<< $(python3 -c "
50+
validate_args
51+
}
52+
53+
54+
fetch_model_info() {
55+
local py_output
56+
local py_status
57+
local model_name="$1"
58+
local tp_size="$2"
59+
60+
py_output=$(python3 -c "
1961
import json
20-
config_file = '$CONFIG_FILE'
21-
model_name = '$MODEL_NAME'
22-
with open(config_file, 'r') as f:
23-
data = json.load(f)
62+
import sys
63+
64+
config_file = '$MODEL_CONFIG_FILE'
65+
model_name = '$model_name'
66+
tp_size = '$tp_size'
67+
68+
try:
69+
with open(config_file, 'r') as f:
70+
data = json.load(f)
71+
except Exception as e:
72+
sys.stderr.write(f'× 配置文件读取失败: {str(e)}\\n')
73+
exit(1)
74+
2475
info = data.get(model_name, {})
25-
print(info.get('url', ''), info.get('tensor_parallel_size', ''))
26-
")
76+
if not info:
77+
sys.stderr.write(f'× 模型 \"{model_name}\" 不在支持列表中\\n')
78+
sys.stderr.write(f'✓ 支持的模型: {list(data.keys())}\\n')
79+
exit(2)
2780
28-
if [ -z "$MODEL_URL" ]; then
29-
echo "× $MODEL_NAME is not supported, please refer to the website to try other models: https://docs.mthreads.com/mtt/mtt-doc-online/compability"
30-
exit 1
31-
fi
81+
# 处理tensor_parallel_size
82+
supported_tp_sizes = info.get('tensor_parallel_size', [])
83+
if not isinstance(supported_tp_sizes, list):
84+
supported_tp_sizes = [supported_tp_sizes] if supported_tp_sizes else []
3285
33-
echo "√ Find Succeed: $MODEL_URL"
34-
echo "√ tensor_parallel_size: $TENSOR_PARALLEL_SIZE"
35-
36-
# 目录和日志路径
37-
CURRENT_DIR=$(pwd)
38-
MODEL_DIR="/data/mtt/models"
39-
CONVERTED_MODEL_DIR="/data/mtt/models_convert"
40-
LOG_FILE="/data/mtt/logs/model_server.log"
41-
MODEL_CHECK_FILE="$MODEL_DIR/$MODEL_NAME/model.safetensors"
42-
SUCCESS_MESSAGE="INFO: Started server process"
43-
44-
# 确保目录存在
45-
mkdir -p "$MODEL_DIR" "$CONVERTED_MODEL_DIR" "$(dirname "$LOG_FILE")"
46-
47-
# 检查模型是否已经存在
48-
if [ -f "$MODEL_CHECK_FILE" ]; then
49-
echo "√ The model file already exists. Skip the download step."
50-
else
51-
echo "⬇ The model file does not exist, start downloading the model..."
52-
cd "$MODEL_DIR"
53-
apt update && apt install -y git-lfs jq
54-
git lfs install
55-
git clone "$MODEL_URL" "$MODEL_NAME"
56-
echo "√ Model download completed."
57-
fi
86+
final_tp_size = None
87+
if tp_size and tp_size.isdigit():
88+
if int(tp_size) not in supported_tp_sizes:
89+
sys.stderr.write(f'× 不支持的tensor_parallel_size值: {tp_size}\\n')
90+
sys.stderr.write(f'✓ 支持的TP大小: {supported_tp_sizes}\\n')
91+
exit(3)
92+
final_tp_size = tp_size
93+
else:
94+
final_tp_size = supported_tp_sizes[-1] if supported_tp_sizes else 1
95+
96+
# 输出结果(用制表符分隔)
97+
print('\t'.join([
98+
info.get('modelscope_url', ''),
99+
info.get('huggingface_url', ''),
100+
str(final_tp_size)
101+
]))
102+
" 2>&1)
103+
104+
py_status=$?
105+
# 处理Python脚本错误
106+
if [ $py_status -ne 0 ]; then
107+
echo "$py_output" >&2
108+
exit $py_status
109+
fi
110+
111+
echo "$py_output"
112+
}
113+
114+
check_and_prepare_model() {
115+
116+
if [ -z "$1" ]; then
117+
echo "错误:必须提供 model_name 参数" >&2
118+
return 1
119+
fi
120+
121+
local model_name="$1"
122+
local model_path="$2"
123+
local converted_model_path="$3"
124+
local tp_size="$4"
125+
local model_url="$5"
126+
127+
# 1. 如果只有 model_name, 下载模型
128+
if [ -z "$model_path" ] && [ -z "$converted_model_path" ]; then
129+
130+
model_path=$DEFAUTL_MODEL_DIR/$model_name
131+
converted_model_path=$DEFAUTL_MODEL_DIR/$model_name-tp$tp_size-converted
132+
133+
mkdir -p "$model_path" "$converted_model_path"
134+
135+
echo -e "\e[32mmodel_path: $model_path\e[0m" >&2
136+
echo -e "\e[32mconverted_model_path: $converted_model_path\e[0m" >&2
137+
apt-get update -qq >&2 && apt-get install -y --no-install-recommends git-lfs jq >&2
138+
git lfs install >&2
139+
git clone "$model_url" "$model_path" >&2 # TODO(wangkang): need check for clone if successful
140+
echo "√ Model download completed." >&2
141+
142+
143+
# 2. 如果只有 model_path 没有 converted_model_path
144+
elif [ -n "$model_path" ] && [ -z "$converted_model_path" ]; then
145+
146+
# not found model dir
147+
if [ ! -e "$model_path" ]; then
148+
echo "Erro: Not found model path $model_path" >&2
149+
exit 1
150+
fi
151+
converted_model_path=$(dirname "$model_path")/$model_name-tp$tp_size-converted
152+
echo "Automatically generate converted_model_path: $converted_model_path" >&2
153+
mkdir -p "$converted_model_path"
154+
155+
elif [ -n "$converted_model_path" ] && [ ! -e "$converted_model_path" ]; then
156+
echo "Erro: Not found converted model path $model_path" >&2
157+
exit 1
158+
159+
fi
160+
161+
if [ -z "$(ls -A $converted_model_path)" ]; then
162+
convert_weight $model_path $converted_model_path $tp_size >&2
163+
fi
164+
165+
echo "$converted_model_path"
166+
}
167+
168+
169+
convert_weight() {
170+
local model_dir="$1"
171+
local converted_model_dir="$2"
172+
local tp_size="$3"
173+
local model_type="$4"
174+
175+
local python_cmd=(
176+
python -u # -u 参数禁用缓冲
177+
-m mttransformer.convert_weight
178+
--in_file "${model_dir}"
179+
--saved_dir "${converted_model_dir}"
180+
--tensor-para-size "${tp_size}"
181+
)
182+
183+
[[ -n "${model_type}" ]] && python_cmd+=(--model-type "${model_type}")
184+
185+
"${python_cmd[@]}"
186+
}
187+
188+
189+
wait_for_log_update() {
190+
local log_file="$1"
191+
local server_pid="$2"
192+
local model_name="$3"
193+
local model_path="$4"
194+
195+
# 设定超时时间(秒)
196+
local timeout=30
197+
local elapsed=0
198+
local no_change_count=0
199+
200+
# 确保日志文件存在
201+
touch "$log_file"
202+
203+
# 获取初始日志文件大小
204+
local last_size=$(stat -c%s "$log_file")
205+
206+
while ((elapsed < timeout)); do
207+
sleep 10
208+
local current_size=$(stat -c%s "$log_file")
209+
210+
echo -e "\e[32m"
211+
if [[ "$current_size" -eq "$last_size" ]]; then
212+
213+
if [ -z "$CONTAINER_NAME" ]; then
214+
echo "Please send the following request to obtain the model inference result."
215+
else
216+
echo "Please send the following request in container($CONTAINER_NAME) to obtain the model inference result."
217+
fi
218+
cat <<EOF
219+
220+
221+
curl http://0.0.0.0:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
222+
"model": "$model_name",
223+
"messages": [
224+
{"role": "system", "content": "You are a helpful assistant."},
225+
{"role": "user", "content": "Who won the NBA final series in 2020?"}
226+
]
227+
}'
228+
EOF
229+
echo -e "\e[0m"
230+
break
231+
fi
58232

59-
# 权重转换
60-
cd "${CURRENT_DIR}/.."
61-
./convert_weight.sh "$MODEL_DIR/$MODEL_NAME" $TENSOR_PARALLEL_SIZE
233+
last_size=$current_size
234+
done
62235

63-
# 启动 vLLM 服务器
64-
python -m vllm.entrypoints.openai.api_server \
65-
--model "$CONVERTED_MODEL_DIR/$MODEL_NAME-tp$TENSOR_PARALLEL_SIZE-convert" \
236+
while ((elapsed < timeout)); do
237+
sleep 30
238+
((elapsed += 2))
239+
240+
# 更新上次记录的大小
241+
last_size=$current_size
242+
243+
# 检查进程是否仍在运行
244+
if ! ps -p "$server_pid" > /dev/null; then
245+
echo "Error: vLLM server process exited unexpectedly. Check logs: $log_file"
246+
return 1
247+
fi
248+
done
249+
return 1
250+
}
251+
252+
start_server() {
253+
# 解析传入的参数
254+
local converted_model_path="$1"
255+
local tensor_parallel_size="$2"
256+
local served_model_name="$3"
257+
258+
log_file=$(dirname "$converted_model_path")/model_server.log
259+
echo "Wait for the service to start..."
260+
python -m vllm.entrypoints.openai.api_server \
261+
--model "$converted_model_path" \
66262
--trust-remote-code \
67-
--tensor-parallel-size $TENSOR_PARALLEL_SIZE \
263+
--tensor-parallel-size "$tensor_parallel_size" \
68264
-pp 1 \
69265
--block-size 64 \
70266
--max-model-len 2048 \
71267
--disable-log-stats \
72268
--disable-log-requests \
73269
--device "musa" \
74-
--served-model-name model-develop_test > "$LOG_FILE" 2>&1 &
75-
76-
pid=$!
77-
echo "Wait for the service to start..."
78-
while true; do
79-
if grep -q "$SUCCESS_MESSAGE" "$LOG_FILE"; then
80-
echo "√ Service has been started. If it does not work, check the log: $LOG_FILE"
81-
break
82-
else
83-
echo "Wait for the service to start..."
84-
sleep 5 # 每隔 5 秒检查日志文件
85-
fi
86-
done
270+
--served-model-name "$served_model_name" "$log_file" 2>&1 &
271+
272+
SERVER_PID=$!
273+
274+
wait_for_log_update "$log_file" "$SERVER_PID" "$served_model_name" "$converted_model_path"
275+
}
276+
277+
278+
279+
# 主函数
280+
main() {
281+
parse_args "$@"
282+
283+
# load json
284+
if ! output=$(fetch_model_info "$TASK" "$TP_SIZE"); then
285+
exit $?
286+
fi
287+
read -r ms_url hf_url tp_size <<< "$output"
288+
289+
# prepare model
290+
if ! output=$(check_and_prepare_model "$TASK" "$MODEL" "$CONVERTED_MODEL" "$tp_size" "$ms_url"); then
291+
exit $?
292+
fi
293+
read -r converted_model_path <<< "$output"
294+
295+
start_server "$converted_model_path" "$tp_size" "$TASK"
296+
297+
}
298+
299+
# 执行主函数
300+
main "$@"

0 commit comments

Comments
 (0)