Skip to content

Commit 609cc88

Browse files
committed
update readme
1 parent dbac077 commit 609cc88

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ models:
138138
- chatglm4: #自定义的模型名称
139139
alias: null # 别名 例如 gpt4,gpt3
140140
enable: true # false true 控制是否启动模型worker
141-
model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
141+
model_config:
142+
model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
142143
model_type: chatglm # qwen yi internlm
143144
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
144145
# lora: # lora 配置
@@ -162,7 +163,11 @@ models:
162163
- qwen: #自定义的模型名称
163164
alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
164165
enable: true # false true 控制是否启动模型worker
165-
model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/
166+
model_config:
167+
model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/
168+
enable_prefix_caching: false
169+
dtype: auto
170+
max_model_len: 65536
166171
model_type: qwen # qwen yi internlm
167172
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
168173
device: gpu # gpu / cpu
@@ -176,7 +181,8 @@ models:
176181
- bge-base-zh:
177182
alias: null # 别名
178183
enable: true # false true
179-
model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
184+
model_config:
185+
model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
180186
model_type: embedding_infinity # embedding_infinity
181187
work_mode: hf
182188
device: gpu # gpu / cpu
@@ -187,7 +193,8 @@ models:
187193
- bge-reranker-base:
188194
alias: null # 别名
189195
enable: true # false true 控制是否启动模型worker
190-
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
196+
model_config:
197+
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
191198
model_type: embedding_infinity # embedding_infinity
192199
work_mode: hf
193200
device: gpu # gpu / cpu

gpt_server/script/config.yaml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,48 +14,57 @@ models:
1414
- minicpmv:
1515
alias: null
1616
enable: false
17-
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
1817
model_type: minicpmv
18+
model_config:
19+
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
20+
enable_prefix_caching: false
21+
dtype: auto
1922
work_mode: lmdeploy-turbomind
20-
enable_prefix_caching: false
2123
device: gpu
2224
workers:
2325
- gpus:
2426
- 3
2527
- internvl2:
2628
alias: null
2729
enable: false
28-
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
30+
model_config:
31+
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
32+
enable_prefix_caching: false
2933
model_type: internvl2
3034
work_mode: lmdeploy-turbomind
31-
enable_prefix_caching: false
3235
device: gpu
3336
workers:
3437
- gpus:
3538
- 3
3639
- chatglm4:
3740
alias: chatglm3
3841
enable: true
39-
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
42+
model_config:
43+
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
44+
enable_prefix_caching: false
4045
model_type: chatglm
4146
work_mode: vllm
42-
enable_prefix_caching: false
4347
device: gpu
4448
workers:
4549
- gpus:
4650
- 3
51+
4752
- qwen-72b:
4853
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
4954
enable: true
50-
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
55+
model_config:
56+
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
57+
enable_prefix_caching: true
58+
dtype: auto
59+
max_model_len: 65536
5160
model_type: qwen
5261
work_mode: lmdeploy-turbomind
53-
enable_prefix_caching: true
5462
device: gpu
5563
workers:
5664
- gpus:
5765
- 0
5866
- 1
67+
5968
- piccolo-base-zh:
6069
alias: null
6170
enable: true
@@ -78,6 +87,7 @@ models:
7887
workers:
7988
- gpus:
8089
- 2
90+
8191
- bge-reranker-base:
8292
alias: null
8393
enable: true

0 commit comments

Comments
 (0)