11#! /bin/bash
2- set -e
32
4- MODEL_NAME=" $1 "
5- CONFIG_FILE=" model_config.json"
3+ # 默认值
4+ MODEL=" "
5+ CONVERTED_MODEL=" "
6+ TP_SIZE=" "
7+ MODEL_CONFIG_FILE=" supported_models.json"
8+ TIME=$( date " +%Y%m%d_%H%M%S" )
9+ DEFAUTL_MODEL_DIR=" /data/musa_develop_demo_$TIME "
610
7- if [ -z " $MODEL_NAME " ]; then
8- echo " × Please provide the model name, for example:"
9- echo " ./run_vllm_serving.sh DeepSeek-R1-Distill-Qwen-1.5B"
10- exit 1
11- fi
1211
13- if [ ! -f " $CONFIG_FILE " ]; then
14- echo " × Error: Config file $CONFIG_FILE not found! Please make sure it exists."
12+ # 解析参数的函数
13+ parse_args () {
14+ while [[ $# -gt 0 ]]; do
15+ case " $1 " in
16+ --task) # model_name
17+ TASK=" $2 "
18+ shift 2
19+ ;;
20+ --model) # original model path
21+ MODEL=" $2 "
22+ shift 2
23+ ;;
24+ --converted-model) # converted model path
25+ CONVERTED_MODEL=" $2 "
26+ shift 2
27+ ;;
28+ -tp-size)
29+ TP_SIZE=" $2 "
30+ shift 2
31+ ;;
32+ --container-name)
33+ CONTAINER_NAME=" $2 "
34+ shift 2
35+ ;;
36+ * )
37+ echo " 未知参数: $1 "
38+ exit 1
39+ ;;
40+ esac
41+ done
42+
43+ validate_args () {
44+ if [[ -z " $TASK " ]]; then
45+ echo " Usage: $0 --task <model_name> [--model <original_model_path> | --converted-model <converted_model_path>] [-tp-size <tensor_parallel_size>]"
1546 exit 1
1647fi
48+ }
1749
18- read MODEL_URL TENSOR_PARALLEL_SIZE <<< $( python3 -c "
50+ validate_args
51+ }
52+
53+
54+ fetch_model_info () {
55+ local py_output
56+ local py_status
57+ local model_name=" $1 "
58+ local tp_size=" $2 "
59+
60+ py_output=$( python3 -c "
1961import json
20- config_file = '$CONFIG_FILE '
21- model_name = '$MODEL_NAME '
22- with open(config_file, 'r') as f:
23- data = json.load(f)
62+ import sys
63+
64+ config_file = '$MODEL_CONFIG_FILE '
65+ model_name = '$model_name '
66+ tp_size = '$tp_size '
67+
68+ try:
69+ with open(config_file, 'r') as f:
70+ data = json.load(f)
71+ except Exception as e:
72+ sys.stderr.write(f'× 配置文件读取失败: {str(e)}\\ n')
73+ exit(1)
74+
2475info = data.get(model_name, {})
25- print(info.get('url', ''), info.get('tensor_parallel_size', ''))
26- " )
76+ if not info:
77+ sys.stderr.write(f'× 模型 \" {model_name}\" 不在支持列表中\\ n')
78+ sys.stderr.write(f'✓ 支持的模型: {list(data.keys())}\\ n')
79+ exit(2)
2780
28- if [ -z " $MODEL_URL " ] ; then
29- echo " × $MODEL_NAME is not supported, please refer to the website to try other models: https://docs.mthreads.com/mtt/mtt-doc-online/compability "
30- exit 1
31- fi
81+ # 处理tensor_parallel_size
82+ supported_tp_sizes = info.get('tensor_parallel_size', [])
83+ if not isinstance(supported_tp_sizes, list):
84+ supported_tp_sizes = [supported_tp_sizes] if supported_tp_sizes else []
3285
33- echo " √ Find Succeed: $MODEL_URL "
34- echo " √ tensor_parallel_size: $TENSOR_PARALLEL_SIZE "
35-
36- # 目录和日志路径
37- CURRENT_DIR=$( pwd)
38- MODEL_DIR=" /data/mtt/models"
39- CONVERTED_MODEL_DIR=" /data/mtt/models_convert"
40- LOG_FILE=" /data/mtt/logs/model_server.log"
41- MODEL_CHECK_FILE=" $MODEL_DIR /$MODEL_NAME /model.safetensors"
42- SUCCESS_MESSAGE=" INFO: Started server process"
43-
44- # 确保目录存在
45- mkdir -p " $MODEL_DIR " " $CONVERTED_MODEL_DIR " " $( dirname " $LOG_FILE " ) "
46-
47- # 检查模型是否已经存在
48- if [ -f " $MODEL_CHECK_FILE " ]; then
49- echo " √ The model file already exists. Skip the download step."
50- else
51- echo " ⬇ The model file does not exist, start downloading the model..."
52- cd " $MODEL_DIR "
53- apt update && apt install -y git-lfs jq
54- git lfs install
55- git clone " $MODEL_URL " " $MODEL_NAME "
56- echo " √ Model download completed."
57- fi
86+ final_tp_size = None
87+ if tp_size and tp_size.isdigit():
88+ if int(tp_size) not in supported_tp_sizes:
89+ sys.stderr.write(f'× 不支持的tensor_parallel_size值: {tp_size}\\ n')
90+ sys.stderr.write(f'✓ 支持的TP大小: {supported_tp_sizes}\\ n')
91+ exit(3)
92+ final_tp_size = tp_size
93+ else:
94+ final_tp_size = supported_tp_sizes[-1] if supported_tp_sizes else 1
95+
96+ # 输出结果(用制表符分隔)
97+ print('\t'.join([
98+ info.get('modelscope_url', ''),
99+ info.get('huggingface_url', ''),
100+ str(final_tp_size)
101+ ]))
102+ " 2>&1 )
103+
104+ py_status=$?
105+ # 处理Python脚本错误
106+ if [ $py_status -ne 0 ]; then
107+ echo " $py_output " >&2
108+ exit $py_status
109+ fi
110+
111+ echo " $py_output "
112+ }
113+
114+ check_and_prepare_model () {
115+
116+ if [ -z " $1 " ]; then
117+ echo " 错误:必须提供 model_name 参数" >&2
118+ return 1
119+ fi
120+
121+ local model_name=" $1 "
122+ local model_path=" $2 "
123+ local converted_model_path=" $3 "
124+ local tp_size=" $4 "
125+ local model_url=" $5 "
126+
127+ # 1. 如果只有 model_name, 下载模型
128+ if [ -z " $model_path " ] && [ -z " $converted_model_path " ]; then
129+
130+ model_path=$DEFAUTL_MODEL_DIR /$model_name
131+ converted_model_path=$DEFAUTL_MODEL_DIR /$model_name -tp$tp_size -converted
132+
133+ mkdir -p " $model_path " " $converted_model_path "
134+
135+ echo -e " \e[32mmodel_path: $model_path \e[0m" >&2
136+ echo -e " \e[32mconverted_model_path: $converted_model_path \e[0m" >&2
137+ apt-get update -qq >&2 && apt-get install -y --no-install-recommends git-lfs jq >&2
138+ git lfs install >&2
139+ git clone " $model_url " " $model_path " >&2 # TODO(wangkang): need check for clone if successful
140+ echo " √ Model download completed." >&2
141+
142+
143+ # 2. 如果只有 model_path 没有 converted_model_path
144+ elif [ -n " $model_path " ] && [ -z " $converted_model_path " ]; then
145+
146+ # not found model dir
147+ if [ ! -e " $model_path " ]; then
148+ echo " Erro: Not found model path $model_path " >&2
149+ exit 1
150+ fi
151+ converted_model_path=$( dirname " $model_path " ) /$model_name -tp$tp_size -converted
152+ echo " Automatically generate converted_model_path: $converted_model_path " >&2
153+ mkdir -p " $converted_model_path "
154+
155+ elif [ -n " $converted_model_path " ] && [ ! -e " $converted_model_path " ]; then
156+ echo " Erro: Not found converted model path $model_path " >&2
157+ exit 1
158+
159+ fi
160+
161+ if [ -z " $( ls -A $converted_model_path ) " ]; then
162+ convert_weight $model_path $converted_model_path $tp_size >&2
163+ fi
164+
165+ echo " $converted_model_path "
166+ }
167+
168+
169+ convert_weight () {
170+ local model_dir=" $1 "
171+ local converted_model_dir=" $2 "
172+ local tp_size=" $3 "
173+ local model_type=" $4 "
174+
175+ local python_cmd=(
176+ python -u # -u 参数禁用缓冲
177+ -m mttransformer.convert_weight
178+ --in_file " ${model_dir} "
179+ --saved_dir " ${converted_model_dir} "
180+ --tensor-para-size " ${tp_size} "
181+ )
182+
183+ [[ -n " ${model_type} " ]] && python_cmd+=(--model-type " ${model_type} " )
184+
185+ " ${python_cmd[@]} "
186+ }
187+
188+
189+ wait_for_log_update () {
190+ local log_file=" $1 "
191+ local server_pid=" $2 "
192+ local model_name=" $3 "
193+ local model_path=" $4 "
194+
195+ # 设定超时时间(秒)
196+ local timeout=30
197+ local elapsed=0
198+ local no_change_count=0
199+
200+ # 确保日志文件存在
201+ touch " $log_file "
202+
203+ # 获取初始日志文件大小
204+ local last_size=$( stat -c%s " $log_file " )
205+
206+ while (( elapsed < timeout)) ; do
207+ sleep 10
208+ local current_size=$( stat -c%s " $log_file " )
209+
210+ echo -e " \e[32m"
211+ if [[ " $current_size " -eq " $last_size " ]]; then
212+
213+ if [ -z " $CONTAINER_NAME " ]; then
214+ echo " Please send the following request to obtain the model inference result."
215+ else
216+ echo " Please send the following request in container($CONTAINER_NAME ) to obtain the model inference result."
217+ fi
218+ cat << EOF
219+
220+
221+ curl http://0.0.0.0:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
222+ "model": "$model_name ",
223+ "messages": [
224+ {"role": "system", "content": "You are a helpful assistant."},
225+ {"role": "user", "content": "Who won the NBA final series in 2020?"}
226+ ]
227+ }'
228+ EOF
229+ echo -e " \e[0m"
230+ break
231+ fi
58232
59- # 权重转换
60- cd " ${CURRENT_DIR} /.."
61- ./convert_weight.sh " $MODEL_DIR /$MODEL_NAME " $TENSOR_PARALLEL_SIZE
233+ last_size=$current_size
234+ done
62235
63- # 启动 vLLM 服务器
64- python -m vllm.entrypoints.openai.api_server \
65- --model " $CONVERTED_MODEL_DIR /$MODEL_NAME -tp$TENSOR_PARALLEL_SIZE -convert" \
236+ while (( elapsed < timeout)) ; do
237+ sleep 30
238+ (( elapsed += 2 ))
239+
240+ # 更新上次记录的大小
241+ last_size=$current_size
242+
243+ # 检查进程是否仍在运行
244+ if ! ps -p " $server_pid " > /dev/null; then
245+ echo " Error: vLLM server process exited unexpectedly. Check logs: $log_file "
246+ return 1
247+ fi
248+ done
249+ return 1
250+ }
251+
252+ start_server () {
253+ # 解析传入的参数
254+ local converted_model_path=" $1 "
255+ local tensor_parallel_size=" $2 "
256+ local served_model_name=" $3 "
257+
258+ log_file=$( dirname " $converted_model_path " ) /model_server.log
259+ echo " Wait for the service to start..."
260+ python -m vllm.entrypoints.openai.api_server \
261+ --model " $converted_model_path " \
66262 --trust-remote-code \
67- --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
263+ --tensor-parallel-size " $tensor_parallel_size " \
68264 -pp 1 \
69265 --block-size 64 \
70266 --max-model-len 2048 \
71267 --disable-log-stats \
72268 --disable-log-requests \
73269 --device " musa" \
74- --served-model-name model-develop_test > " $LOG_FILE " 2>&1 &
75-
76- pid=$!
77- echo " Wait for the service to start..."
78- while true ; do
79- if grep -q " $SUCCESS_MESSAGE " " $LOG_FILE " ; then
80- echo " √ Service has been started. If it does not work, check the log: $LOG_FILE "
81- break
82- else
83- echo " Wait for the service to start..."
84- sleep 5 # 每隔 5 秒检查日志文件
85- fi
86- done
270+ --served-model-name " $served_model_name " " $log_file " 2>&1 &
271+
272+ SERVER_PID=$!
273+
274+ wait_for_log_update " $log_file " " $SERVER_PID " " $served_model_name " " $converted_model_path "
275+ }
276+
277+
278+
279+ # 主函数
280+ main () {
281+ parse_args " $@ "
282+
283+ # load json
284+ if ! output=$( fetch_model_info " $TASK " " $TP_SIZE " ) ; then
285+ exit $?
286+ fi
287+ read -r ms_url hf_url tp_size <<< " $output"
288+
289+ # prepare model
290+ if ! output=$( check_and_prepare_model " $TASK " " $MODEL " " $CONVERTED_MODEL " " $tp_size " " $ms_url " ) ; then
291+ exit $?
292+ fi
293+ read -r converted_model_path <<< " $output"
294+
295+ start_server " $converted_model_path " " $tp_size " " $TASK "
296+
297+ }
298+
299+ # 执行主函数
300+ main " $@ "
0 commit comments