MooreThreads
diff --git a/‎vllm/demo/model_config.json‎
Lines changed: 0 additions & 10 deletions b/‎vllm/demo/model_config.json‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎vllm/demo/run_vllm_serving.sh‎
Lines changed: 280 additions & 66 deletions b/‎vllm/demo/run_vllm_serving.sh‎
Lines changed: 280 additions & 66 deletions
@@ -1,86 +1,300 @@
 #!/bin/bash
-set -e
 
-MODEL_NAME="$1"
-CONFIG_FILE="model_config.json"
+# 默认值
+MODEL=""
+CONVERTED_MODEL=""
+TP_SIZE=""
+MODEL_CONFIG_FILE="supported_models.json"
+TIME=$(date "+%Y%m%d_%H%M%S")
+DEFAUTL_MODEL_DIR="/data/musa_develop_demo_$TIME"
 
-if [ -z "$MODEL_NAME" ]; then
-    echo "× Please provide the model name, for example:"
-    echo "  ./run_vllm_serving.sh DeepSeek-R1-Distill-Qwen-1.5B"
-    exit 1
-fi
 
-if [ ! -f "$CONFIG_FILE" ]; then
-    echo "× Error: Config file $CONFIG_FILE not found! Please make sure it exists."
+# 解析参数的函数
+parse_args() {
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --task)  # model_name
+            TASK="$2"
+            shift 2
+            ;;
+        --model)  # original model path
+            MODEL="$2"
+            shift 2
+            ;;
+        --converted-model)  # converted model path
+            CONVERTED_MODEL="$2"
+            shift 2
+            ;;
+        -tp-size)
+            TP_SIZE="$2"
+            shift 2
+            ;;
+        --container-name)
+            CONTAINER_NAME="$2"
+            shift 2
+            ;;
+        *)
+            echo "未知参数: $1"
+            exit 1
+            ;;
+    esac
+done
+
+validate_args() {
+if [[ -z "$TASK" ]]; then
+    echo "Usage: $0 --task <model_name> [--model <original_model_path> | --converted-model <converted_model_path>] [-tp-size <tensor_parallel_size>]"
     exit 1
 fi
+}
 
-read MODEL_URL TENSOR_PARALLEL_SIZE <<< $(python3 -c "
+validate_args
+}
+
+
+fetch_model_info() {
+    local py_output
+    local py_status
+    local model_name="$1"
+    local tp_size="$2"
+
+    py_output=$(python3 -c "
 import json
-config_file = '$CONFIG_FILE'
-model_name = '$MODEL_NAME'
-with open(config_file, 'r') as f:
-    data = json.load(f)
+import sys
+
+config_file = '$MODEL_CONFIG_FILE'
+model_name = '$model_name'
+tp_size = '$tp_size'
+
+try:
+    with open(config_file, 'r') as f:
+        data = json.load(f)
+except Exception as e:
+    sys.stderr.write(f'× 配置文件读取失败: {str(e)}\\n')
+    exit(1)
+
 info = data.get(model_name, {})
-print(info.get('url', ''), info.get('tensor_parallel_size', ''))
-")
+if not info:
+    sys.stderr.write(f'× 模型 \"{model_name}\" 不在支持列表中\\n')
+    sys.stderr.write(f'✓ 支持的模型: {list(data.keys())}\\n')
+    exit(2)
 
-if [ -z "$MODEL_URL" ]; then
-    echo "× $MODEL_NAME is not supported, please refer to the website to try other models: https://docs.mthreads.com/mtt/mtt-doc-online/compability"
-    exit 1
-fi
+# 处理tensor_parallel_size
+supported_tp_sizes = info.get('tensor_parallel_size', [])
+if not isinstance(supported_tp_sizes, list):
+    supported_tp_sizes = [supported_tp_sizes] if supported_tp_sizes else []
 
-echo "√ Find Succeed: $MODEL_URL"
-echo "√ tensor_parallel_size: $TENSOR_PARALLEL_SIZE"
-
-# 目录和日志路径
-CURRENT_DIR=$(pwd)
-MODEL_DIR="/data/mtt/models"
-CONVERTED_MODEL_DIR="/data/mtt/models_convert"
-LOG_FILE="/data/mtt/logs/model_server.log"
-MODEL_CHECK_FILE="$MODEL_DIR/$MODEL_NAME/model.safetensors"  
-SUCCESS_MESSAGE="INFO:     Started server process"
-
-# 确保目录存在
-mkdir -p "$MODEL_DIR" "$CONVERTED_MODEL_DIR" "$(dirname "$LOG_FILE")"
-
-# 检查模型是否已经存在
-if [ -f "$MODEL_CHECK_FILE" ]; then
-    echo "√ The model file already exists. Skip the download step."
-else
-    echo "⬇ The model file does not exist, start downloading the model..."
-    cd "$MODEL_DIR"
-    apt update && apt install -y git-lfs jq
-    git lfs install
-    git clone "$MODEL_URL" "$MODEL_NAME"
-    echo "√ Model download completed."
-fi
+final_tp_size = None
+if tp_size and tp_size.isdigit():
+    if int(tp_size) not in supported_tp_sizes:
+        sys.stderr.write(f'× 不支持的tensor_parallel_size值: {tp_size}\\n')
+        sys.stderr.write(f'✓ 支持的TP大小: {supported_tp_sizes}\\n')
+        exit(3)
+    final_tp_size = tp_size
+else:
+    final_tp_size = supported_tp_sizes[-1] if supported_tp_sizes else 1
+
+# 输出结果（用制表符分隔）
+print('\t'.join([
+    info.get('modelscope_url', ''),
+    info.get('huggingface_url', ''),
+    str(final_tp_size)
+]))
+" 2>&1)
+
+    py_status=$?
+    # 处理Python脚本错误
+    if [ $py_status -ne 0 ]; then
+        echo "$py_output" >&2
+        exit $py_status
+    fi
+
+    echo "$py_output"
+}
+
+check_and_prepare_model() {
+
+    if [ -z "$1" ]; then
+        echo "错误：必须提供 model_name 参数" >&2
+        return 1
+    fi
+
+    local model_name="$1"
+    local model_path="$2"
+    local converted_model_path="$3"
+    local tp_size="$4"
+    local model_url="$5"
+
+    # 1. 如果只有 model_name, 下载模型
+    if [ -z "$model_path" ] && [ -z "$converted_model_path" ]; then
+
+        model_path=$DEFAUTL_MODEL_DIR/$model_name
+        converted_model_path=$DEFAUTL_MODEL_DIR/$model_name-tp$tp_size-converted
+
+        mkdir -p "$model_path" "$converted_model_path"
+
+        echo -e "\e[32mmodel_path: $model_path\e[0m" >&2
+        echo -e "\e[32mconverted_model_path: $converted_model_path\e[0m" >&2
+        apt-get update -qq >&2 && apt-get install -y --no-install-recommends git-lfs jq >&2
+        git lfs install >&2
+        git clone "$model_url" "$model_path" >&2  # TODO(wangkang): need check for clone if successful
+        echo "√ Model download completed." >&2
+
+
+    # 2. 如果只有 model_path 没有 converted_model_path
+    elif [ -n "$model_path" ] && [ -z "$converted_model_path" ]; then
+
+        # not found model dir
+        if [ ! -e "$model_path" ]; then
+            echo "Erro: Not found model path $model_path" >&2
+            exit 1
+        fi
+        converted_model_path=$(dirname "$model_path")/$model_name-tp$tp_size-converted
+        echo "Automatically generate converted_model_path: $converted_model_path" >&2
+        mkdir -p "$converted_model_path"
+    
+    elif [ -n "$converted_model_path" ] && [ ! -e "$converted_model_path" ]; then
+        echo "Erro: Not found converted model path $model_path" >&2
+        exit 1
+
+    fi
+
+    if [ -z "$(ls -A $converted_model_path)" ]; then
+        convert_weight $model_path $converted_model_path $tp_size >&2
+    fi
+
+    echo "$converted_model_path"
+}
+
+
+convert_weight() {
+    local model_dir="$1"
+    local converted_model_dir="$2"
+    local tp_size="$3"
+    local model_type="$4"
+
+    local python_cmd=(
+        python -u  # -u 参数禁用缓冲
+        -m mttransformer.convert_weight
+        --in_file "${model_dir}"
+        --saved_dir "${converted_model_dir}"
+        --tensor-para-size "${tp_size}"
+    )
+    
+    [[ -n "${model_type}" ]] && python_cmd+=(--model-type "${model_type}")
+
+    "${python_cmd[@]}"
+}
+
+
+wait_for_log_update() {
+    local log_file="$1"
+    local server_pid="$2"
+    local model_name="$3"
+    local model_path="$4"
+
+    # 设定超时时间（秒）
+    local timeout=30
+    local elapsed=0
+    local no_change_count=0
+
+    # 确保日志文件存在
+    touch "$log_file"
+
+    # 获取初始日志文件大小
+    local last_size=$(stat -c%s "$log_file")
+
+    while ((elapsed < timeout)); do
+        sleep 10
+        local current_size=$(stat -c%s "$log_file")
+
+        echo -e "\e[32m"
+        if [[ "$current_size" -eq "$last_size" ]]; then
+
+            if [ -z "$CONTAINER_NAME" ]; then
+                echo "Please send the following request to obtain the model inference result."
+            else
+                echo "Please send the following request in container($CONTAINER_NAME) to obtain the model inference result."
+            fi
+            cat <<EOF
+
+
+curl http://0.0.0.0:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
+    "model": "$model_name",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Who won the NBA final series in 2020?"}
+    ]
+}'
+EOF
+        echo -e "\e[0m"
+            break
+        fi
 
-# 权重转换
-cd "${CURRENT_DIR}/.."
-./convert_weight.sh "$MODEL_DIR/$MODEL_NAME" $TENSOR_PARALLEL_SIZE
+        last_size=$current_size
+    done
 
-# 启动 vLLM 服务器
-python -m vllm.entrypoints.openai.api_server \
-        --model "$CONVERTED_MODEL_DIR/$MODEL_NAME-tp$TENSOR_PARALLEL_SIZE-convert" \
+    while ((elapsed < timeout)); do
+        sleep 30
+        ((elapsed += 2))
+
+        # 更新上次记录的大小
+        last_size=$current_size
+
+        # 检查进程是否仍在运行
+        if ! ps -p "$server_pid" > /dev/null; then
+            echo "Error: vLLM server process exited unexpectedly. Check logs: $log_file"
+            return 1
+        fi
+    done
+    return 1
+}
+
+start_server() {
+    # 解析传入的参数
+    local converted_model_path="$1"
+    local tensor_parallel_size="$2"
+    local served_model_name="$3"
+    
+    log_file=$(dirname "$converted_model_path")/model_server.log
+    echo "Wait for the service to start..."
+    python -m vllm.entrypoints.openai.api_server \
+        --model "$converted_model_path" \
         --trust-remote-code \
-        --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
+        --tensor-parallel-size "$tensor_parallel_size" \
         -pp 1 \
         --block-size 64 \
         --max-model-len 2048 \
         --disable-log-stats \
         --disable-log-requests \
         --device "musa" \
-        --served-model-name model-develop_test > "$LOG_FILE" 2>&1 &
-
-pid=$!
-echo "Wait for the service to start..."
-while true; do
-    if grep -q "$SUCCESS_MESSAGE" "$LOG_FILE"; then
-        echo "√ Service has been started. If it does not work, check the log: $LOG_FILE"
-        break
-    else
-        echo "Wait for the service to start..."
-        sleep 5  # 每隔 5 秒检查日志文件
-    fi
-done
+        --served-model-name "$served_model_name" "$log_file" 2>&1  &
+
+    SERVER_PID=$!
+
+    wait_for_log_update "$log_file" "$SERVER_PID" "$served_model_name" "$converted_model_path"
+}
+
+
+
+# 主函数
+main() {
+  parse_args "$@"
+
+  # load json
+  if ! output=$(fetch_model_info "$TASK" "$TP_SIZE"); then
+    exit $?
+  fi
+  read -r ms_url hf_url tp_size <<< "$output"
+
+  # prepare model
+  if ! output=$(check_and_prepare_model "$TASK" "$MODEL" "$CONVERTED_MODEL" "$tp_size" "$ms_url"); then
+    exit $?
+  fi
+  read -r converted_model_path <<< "$output"
+
+  start_server "$converted_model_path" "$tp_size" "$TASK"
+
+}
+
+# 执行主函数
+main "$@"