shell-nlp
diff --git a/‎docker-compose.yml‎
Lines changed: 1 addition & 1 deletion b/‎docker-compose.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gpt_server/script/config.yaml‎
Lines changed: 25 additions & 13 deletions b/‎gpt_server/script/config.yaml‎
Lines changed: 25 additions & 13 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions
@@ -5,7 +5,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.copy
-    image: gpt_server:v0.4.0
+    image: gpt_server:_latest
     shm_size: '4g' # 设置共享内存为4GB 
     container_name: gpt_server
     restart: always
 
@@ -1,13 +1,20 @@
 serve_args:
+  # openai 服务的 host 和 port
+  enable: true
   host: 0.0.0.0
   port: 8082
-  controller_address: http://localhost:21001
-  api_keys: null
+  controller_address: http://localhost:21001 # 控制器的ip地址
+  api_keys: null # api_keys: 111,222  # 用来设置 openai 密钥
 controller_args:
+  # 控制器的配置参数
+  enable: true
   host: 0.0.0.0
   port: 21001
-  dispatch_method: shortest_queue
+  dispatch_method: shortest_queue # lottery、shortest_queue # 现有两种请求分发策略，随机（lottery） 和 最短队列（shortest_queue），最短队列方法更推荐。
+
 model_worker_args:
+  # 模型的配置参数，这里port 不能设置，程序自动分配，并注册到 控制器中。
+  # model worker 的配置参数
   host: 0.0.0.0
   controller_address: http://localhost:21001
 models:
@@ -47,21 +54,26 @@ models:
     workers:
     - gpus:
       - 3
-- qwen-72b:
-    alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
+
+- qwen-32b:
+    alias: qwen,gpt-4,gpt-4o,gpt-3.5-turbo,gpt-3.5-turbo-16k
     enable: true
     model_config:
-      model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
+      model_name_or_path: /home/dev/model/Qwen/Qwen2___5-32B-Instruct-AWQ/
       enable_prefix_caching: true
       dtype: auto
       max_model_len: 65536
+      kv_cache_quant_policy: 8
     model_type: qwen
     work_mode: lmdeploy-turbomind
     device: gpu
     workers:
     - gpus:
       - 0
       - 1
+    # - gpus:
+    #   - 3
+    #   - 2
 - piccolo-base-zh:
     alias: null
     enable: true
@@ -73,11 +85,11 @@ models:
     workers:
     - gpus:
       - 2
-- bce-embedding-base_v1:
-    alias: text-embedding-ada-002
+- injection:
+    alias: null
     enable: true
     model_config:
-      model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/
+      model_name_or_path: /home/dev/model/protectai/deberta-v3-base-prompt-injection-v2
     model_type: embedding_infinity
     work_mode: hf
     device: gpu
@@ -95,11 +107,11 @@ models:
     workers:
     - gpus:
       - 2
-- acge_text_embedding:
-    alias: text-embedding-ada-002
-    enable: true
+- MiniCPM-Embedding:
+    alias: null
+    enable: false
     model_config:
-      model_name_or_path: /home/dev/model/aspire/acge_text_embedding
+      model_name_or_path: /home/dev/model/openbmb/MiniCPM-Embedding
     model_type: embedding_infinity
     work_mode: hf
     device: gpu
 
@@ -13,15 +13,15 @@ dependencies = [
     "fschat==0.2.36",
     "gradio==4.26.0",
     "infinity-emb[all]==0.0.73",
-    "lmdeploy==0.6.2",
+    "lmdeploy==0.7.0",
     "loguru>=0.7.2",
     "openai==1.55.3",
     "setuptools==75.2.0",
     "streamlit==1.39.0",
     "torch==2.5.1",
     "torchvision==0.20.1",
-    "transformers==4.45.2",
-    "vllm==0.6.6.post1",
+    "transformers==4.48.2",
+    "vllm==0.7.2",
     "qwen_vl_utils",
     "evalscope[perf]==0.7.0",
     "modelscope==1.20.1",
@@ -35,6 +35,7 @@ override-dependencies = [
     "torch==2.5.1",
     "triton",
     "outlines==0.1.11",
+    "pynvml==12.0.0" # 解决vllm==0.7.2的bug https://github.com/vllm-project/vllm/issues/12847，后面可去掉
 
 ]