update readme

shell-nlp · shell-nlp · commit 609cc88c873c · 2024-10-26T15:15:39.000+08:00
diff --git a/README.md b/README.md
@@ -138,7 +138,8 @@ models:
   - chatglm4:  #自定义的模型名称
       alias: null # 别名     例如  gpt4,gpt3
       enable: true  # false true 控制是否启动模型worker
-      model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
+      model_config:
+        model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
       model_type: chatglm  # qwen  yi internlm
       work_mode: vllm  # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
       # lora:  # lora 配置
@@ -162,7 +163,11 @@ models:
   - qwen:  #自定义的模型名称
       alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名     例如  gpt4,gpt3
       enable: true  # false true 控制是否启动模型worker
-      model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/ 
+      model_config:
+        model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/ 
+        enable_prefix_caching: false
+        dtype: auto
+        max_model_len: 65536
       model_type: qwen  # qwen  yi internlm
       work_mode: vllm  # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
       device: gpu  # gpu / cpu
@@ -176,7 +181,8 @@ models:
   - bge-base-zh:
       alias: null # 别名   
       enable: true  # false true
-      model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
+      model_config:
+        model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
       model_type: embedding_infinity # embedding_infinity 
       work_mode: hf
       device: gpu  # gpu / cpu
@@ -187,7 +193,8 @@ models:
   - bge-reranker-base:
       alias: null # 别名   
       enable: true  # false true  控制是否启动模型worker
-      model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
+      model_config:
+        model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
       model_type: embedding_infinity # embedding_infinity
       work_mode: hf
       device: gpu  # gpu / cpu
diff --git a/gpt_server/script/config.yaml b/gpt_server/script/config.yaml
@@ -14,48 +14,57 @@ models:
 - minicpmv:
     alias: null
     enable: false
-    model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
     model_type: minicpmv
+    model_config:
+      model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
+      enable_prefix_caching: false
+      dtype: auto
     work_mode: lmdeploy-turbomind
-    enable_prefix_caching: false
     device: gpu
     workers:
     - gpus:
       - 3
 - internvl2:
     alias: null
     enable: false
-    model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
+    model_config:
+      model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
+      enable_prefix_caching: false
     model_type: internvl2
     work_mode: lmdeploy-turbomind
-    enable_prefix_caching: false
     device: gpu
     workers:
     - gpus:
       - 3
 - chatglm4:
     alias: chatglm3
     enable: true
-    model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
+    model_config:
+      model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
+      enable_prefix_caching: false
     model_type: chatglm
     work_mode: vllm
-    enable_prefix_caching: false
     device: gpu
     workers:
     - gpus:
       - 3
+
 - qwen-72b:
     alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
     enable: true
-    model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
+    model_config:
+      model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
+      enable_prefix_caching: true
+      dtype: auto
+      max_model_len: 65536
     model_type: qwen
     work_mode: lmdeploy-turbomind
-    enable_prefix_caching: true
     device: gpu
     workers:
     - gpus:
       - 0
       - 1
+
 - piccolo-base-zh:
     alias: null
     enable: true
@@ -78,6 +87,7 @@ models:
     workers:
     - gpus:
       - 2
+
 - bge-reranker-base:
     alias: null
     enable: true