@@ -10,22 +10,23 @@ if [ -z "$MODEL_NAME" ]; then
1010 exit 1
1111fi
1212
13- # 用 Python 解析 JSON 获取 URL
14- MODEL_URL=$( python3 -c "
13+ read MODEL_URL TENSOR_PARALLEL_SIZE <<< $( python3 -c "
1514import json
1615config_file = '$CONFIG_FILE '
1716model_name = '$MODEL_NAME '
1817with open(config_file, 'r') as f:
1918 data = json.load(f)
20- print(data.get(model_name, ''))
19+ info = data.get(model_name, {})
20+ print(info.get('url', ''), info.get('tensor_parallel_size', ''))
2121" )
2222
2323if [ -z " $MODEL_URL " ]; then
2424 echo " × $MODEL_NAME is not supported yet, please refer to the website to try other models: https://docs.mthreads.com/mtt/mtt-doc-online/compability"
2525 exit 1
2626fi
2727
28- echo " √ 找到模型 URL: $MODEL_URL "
28+ echo " √ Find Succeed: $MODEL_URL "
29+ echo " √ tensor_parallel_size: $TENSOR_PARALLEL_SIZE "
2930
3031# 目录和日志路径
3132CURRENT_DIR=$( pwd)
5253
5354# 权重转换
5455cd " ${CURRENT_DIR} /.."
55- ./convert_weight.sh " $MODEL_DIR /$MODEL_NAME " 1
56+ ./convert_weight.sh " $MODEL_DIR /$MODEL_NAME " $TENSOR_PARALLEL_SIZE
5657
5758# 启动 vLLM 服务器
5859python -m vllm.entrypoints.openai.api_server \
59- --model " $CONVERTED_MODEL_DIR /$MODEL_NAME -tp1 -convert" \
60+ --model " $CONVERTED_MODEL_DIR /$MODEL_NAME -tp $TENSOR_PARALLEL_SIZE -convert" \
6061 --trust-remote-code \
61- --tensor-parallel-size 1 \
62+ --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
6263 -pp 1 \
6364 --block-size 64 \
6465 --max-model-len 2048 \
0 commit comments