Skip to content

Commit d21deae

Browse files
authored
Merge pull request #36 from MooreThreads/update_vllm_demo_script
add tp size in json
2 parents 62bff9b + 7b6fc49 commit d21deae

File tree

3 files changed

+18
-11
lines changed

3 files changed

+18
-11
lines changed

vllm/demo/model_config.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"DeepSeek-R1-Distill-Qwen-1.5B": {
3+
"url": "https://www.modelscope.cn/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B.git",
4+
"tensor_parallel_size": 1
5+
},
6+
"Qwen2.5-0.5B-Instruct": {
7+
"url": "https://www.modelscope.cn/Qwen/Qwen2.5-0.5B-Instruct.git",
8+
"tensor_parallel_size": 1
9+
}
10+
}

vllm/demo/model_url.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

vllm/demo/run_vllm_serving.sh

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,23 @@ if [ -z "$MODEL_NAME" ]; then
1010
exit 1
1111
fi
1212

13-
# 用 Python 解析 JSON 获取 URL
14-
MODEL_URL=$(python3 -c "
13+
read MODEL_URL TENSOR_PARALLEL_SIZE <<< $(python3 -c "
1514
import json
1615
config_file = '$CONFIG_FILE'
1716
model_name = '$MODEL_NAME'
1817
with open(config_file, 'r') as f:
1918
data = json.load(f)
20-
print(data.get(model_name, ''))
19+
info = data.get(model_name, {})
20+
print(info.get('url', ''), info.get('tensor_parallel_size', ''))
2121
")
2222

2323
if [ -z "$MODEL_URL" ]; then
2424
echo "× $MODEL_NAME is not supported yet, please refer to the website to try other models: https://docs.mthreads.com/mtt/mtt-doc-online/compability"
2525
exit 1
2626
fi
2727

28-
echo "√ 找到模型 URL: $MODEL_URL"
28+
echo "√ Find Succeed: $MODEL_URL"
29+
echo "√ tensor_parallel_size: $TENSOR_PARALLEL_SIZE"
2930

3031
# 目录和日志路径
3132
CURRENT_DIR=$(pwd)
@@ -52,13 +53,13 @@ fi
5253

5354
# 权重转换
5455
cd "${CURRENT_DIR}/.."
55-
./convert_weight.sh "$MODEL_DIR/$MODEL_NAME" 1
56+
./convert_weight.sh "$MODEL_DIR/$MODEL_NAME" $TENSOR_PARALLEL_SIZE
5657

5758
# 启动 vLLM 服务器
5859
python -m vllm.entrypoints.openai.api_server \
59-
--model "$CONVERTED_MODEL_DIR/$MODEL_NAME-tp1-convert" \
60+
--model "$CONVERTED_MODEL_DIR/$MODEL_NAME-tp$TENSOR_PARALLEL_SIZE-convert" \
6061
--trust-remote-code \
61-
--tensor-parallel-size 1 \
62+
--tensor-parallel-size $TENSOR_PARALLEL_SIZE \
6263
-pp 1 \
6364
--block-size 64 \
6465
--max-model-len 2048 \

0 commit comments

Comments
 (0)