Skip to content

Commit 334f681

Browse files
committed
update config
1 parent 609cc88 commit 334f681

File tree

3 files changed

+36
-174
lines changed

3 files changed

+36
-174
lines changed

gpt_server/script/config.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@ models:
1414
- minicpmv:
1515
alias: null
1616
enable: false
17-
model_type: minicpmv
1817
model_config:
1918
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
2019
enable_prefix_caching: false
21-
dtype: auto
20+
model_type: minicpmv
2221
work_mode: lmdeploy-turbomind
2322
device: gpu
2423
workers:
@@ -48,7 +47,6 @@ models:
4847
workers:
4948
- gpus:
5049
- 3
51-
5250
- qwen-72b:
5351
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
5452
enable: true
@@ -64,7 +62,6 @@ models:
6462
- gpus:
6563
- 0
6664
- 1
67-
6865
- piccolo-base-zh:
6966
alias: null
7067
enable: true
@@ -87,7 +84,6 @@ models:
8784
workers:
8885
- gpus:
8986
- 2
90-
9187
- bge-reranker-base:
9288
alias: null
9389
enable: true

gpt_server/script/config_example.yaml

Lines changed: 15 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,12 @@ model_worker_args:
1818
controller_address: http://localhost:21001
1919

2020
models:
21-
- qwenvl: #自定义的模型名称
22-
alias: null # 别名 例如 gpt4,gpt3
23-
enable: false # false true
24-
model_name_or_path: /home/dev/model/qwen/Qwen2-VL-7B-Instruct/
25-
model_type: qwen # qwen chatglm3 yi internlm
26-
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
27-
device: gpu # gpu / cpu
28-
workers:
29-
- gpus:
30-
# - 1
31-
- 0
3221
- internvl2: #自定义的模型名称
3322
alias: null # 别名 例如 gpt4,gpt3
3423
enable: false # false true
35-
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
24+
model_config:
25+
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
26+
enable_prefix_caching: false
3627
model_type: internvl2 # qwen yi internlm
3728
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
3829
device: gpu # gpu / cpu
@@ -42,174 +33,44 @@ models:
4233
- 0
4334
# - gpus:
4435
# - 0
45-
- chatglm4: #自定义的模型名称
46-
alias: chatglm3 # 别名 例如 gpt4,gpt3
47-
enable: true # false true
48-
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
49-
model_type: chatglm # qwen yi internlm
50-
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
51-
device: gpu # gpu / cpu
52-
workers:
53-
- gpus:
54-
# - 1
55-
- 0
5636

5737
- qwen: #自定义的模型名称
5838
alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
5939
enable: false # false true
60-
model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
40+
model_config:
41+
model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
42+
enable_prefix_caching: true
43+
dtype: auto
44+
max_model_len: 65536
45+
# lora:
46+
# test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100
47+
6148
model_type: qwen # qwen yi internlm
6249
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
63-
# lora:
64-
# test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100
6550

6651
device: gpu # gpu / cpu
6752
workers:
6853
- gpus:
6954
- 1
7055
# - gpus:
7156
# - 3
72-
- qwen-72b: #自定义的模型名称
73-
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
74-
enable: true # false true
75-
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
76-
model_type: qwen # qwen yi internlm
77-
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
78-
enable_prefix_caching: true # false true
79-
device: gpu # gpu / cpu
80-
workers:
81-
- gpus:
82-
- 3
83-
- 1
84-
# - gpus:
85-
# - 1
86-
87-
- mixtral: #自定义的模型名称
88-
alias: null # 别名 例如 gpt4,gpt3
89-
enable: false # false true
90-
model_name_or_path: /home/dev/model/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/
91-
model_type: qwen # qwen yi internlm
92-
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
93-
device: gpu # gpu / cpu
94-
workers:
95-
- gpus:
96-
- 3
97-
- 0
98-
99-
100-
- llama3: #自定义的模型名称
101-
alias: null # 别名 例如 gpt4,gpt3
102-
enable: false # false true
103-
model_name_or_path: /home/dev/model/unsloth/unsloth/llama-3-8b-Instruct/
104-
model_type: llama # qwen yi internlm
105-
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
106-
device: gpu # gpu / cpu
107-
workers:
108-
- gpus:
109-
- 0
110-
111-
- yi: #自定义的模型名称
112-
alias: null # 别名 例如 gpt4,gpt3
113-
enable: false # false true
114-
model_name_or_path: /home/dev/model/01ai/Yi-34B-Chat/
115-
model_type: yi # qwen yi internlm
116-
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
117-
device: gpu # gpu / cpu
118-
workers:
119-
- gpus:
120-
- 2
121-
# - 0
122-
123-
- internlm2: #自定义的模型名称
124-
alias: null # 别名 例如 gpt4,gpt3
125-
enable: false # false true
126-
model_name_or_path: /home/dev/model/Shanghai_AI_Laboratory/internlm2_5-7b-chat/
127-
model_type: internlm # qwen yi internlm
128-
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
129-
device: gpu # gpu / cpu
130-
workers:
131-
- gpus:
132-
- 0
133-
134-
# Embedding 模型
135-
- piccolo-base-zh:
136-
alias: null # 别名
137-
enable: true # false true
138-
model_name_or_path: /home/dev/model/assets/embeddings/sensenova/piccolo-base-zh/
139-
model_type: embedding_infinity # embedding_infinity
140-
work_mode: hf
141-
device: gpu # gpu / cpu
142-
workers:
143-
- gpus:
144-
- 2
145-
146-
- bce-embedding-base_v1:
147-
alias: text-embedding-ada-002 # 别名
148-
enable: true # false true
149-
model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/
150-
model_type: embedding_infinity # embedding_infinity
151-
work_mode: hf
152-
device: gpu # gpu / cpu
153-
workers:
154-
- gpus:
155-
- 2
156-
157-
- conan:
158-
alias: null # 别名
159-
enable: true # false true
160-
model_name_or_path: /home/dev/model/model1001/Conan/
161-
model_type: embedding_infinity # embedding_infinity
162-
work_mode: hf
163-
device: gpu # gpu / cpu
164-
workers:
165-
- gpus:
166-
- 2
16757

16858
- bge-reranker-base:
16959
alias: null # 别名
17060
enable: true # false true
171-
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
61+
model_config:
62+
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
17263
model_type: embedding_infinity # embedding_infinity
17364
work_mode: hf
17465
device: gpu # gpu / cpu
17566
workers:
17667
- gpus:
17768
- 2
178-
- puff:
179-
alias: null # 别名
180-
enable: true # false true
181-
model_name_or_path: /home/dev/model/infgrad/puff-large-v1/
182-
model_type: embedding_infinity # embedding_infinity
183-
work_mode: hf
184-
device: gpu # gpu / cpu
185-
workers:
186-
- gpus:
187-
- 2
188-
18969
- acge_text_embedding:
19070
alias: text-embedding-ada-002 # 别名
19171
enable: true # false true
192-
model_name_or_path: /home/dev/model/aspire/acge_text_embedding
193-
model_type: embedding_infinity # embedding_infinity
194-
work_mode: hf
195-
device: gpu # gpu / cpu
196-
workers:
197-
- gpus:
198-
- 2
199-
- yinka:
200-
alias: null # 别名
201-
enable: false # false true
202-
model_name_or_path: /home/dev/model/Classical/Yinka/
203-
model_type: embedding_infinity # embedding_infinity
204-
work_mode: hf
205-
device: gpu # gpu / cpu
206-
workers:
207-
- gpus:
208-
- 2
209-
- xiaobu-embedding:
210-
alias: null # 别名
211-
enable: true # false true
212-
model_name_or_path: /home/dev/model/lier007/xiaobu-embedding-v2/
72+
model_config:
73+
model_name_or_path: /home/dev/model/aspire/acge_text_embedding
21374
model_type: embedding_infinity # embedding_infinity
21475
work_mode: hf
21576
device: gpu # gpu / cpu
@@ -223,4 +84,3 @@ models:
22384

22485

22586

226-

gpt_server/serving/server_ui.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,16 @@ def on_change():
142142
st.session_state[f"model_name_{i}"]: {
143143
"alias": st.session_state[f"alias_{i}"],
144144
"enable": st.session_state[f"enable_{i}"],
145-
"model_name_or_path": st.session_state[
146-
f"model_name_or_path_{i}"
147-
],
145+
"model_config": {
146+
"model_name_or_path": st.session_state[
147+
f"model_name_or_path_{i}"
148+
],
149+
"enable_prefix_caching": st.session_state[
150+
f"enable_prefix_caching_{i}"
151+
],
152+
},
148153
"model_type": st.session_state[f"model_type_{i}"],
149154
"work_mode": st.session_state[f"work_mode_{i}"],
150-
"enable_prefix_caching": st.session_state[
151-
f"enable_prefix_caching_{i}"
152-
],
153155
"device": st.session_state[f"device_{i}"],
154156
"workers": yaml.safe_load(
155157
st.session_state[f"workers_{i}"]
@@ -180,16 +182,18 @@ def on_change():
180182
"new_model_name": {
181183
"alias": st.session_state[f"alias_{i}"],
182184
"enable": False,
183-
"model_name_or_path": st.session_state[
184-
f"model_name_or_path_{i}"
185-
],
185+
"model_config": {
186+
"model_name_or_path": st.session_state[
187+
f"model_name_or_path_{i}"
188+
],
189+
"enable_prefix_caching": st.session_state[
190+
f"enable_prefix_caching_{i}"
191+
],
192+
},
186193
"model_type": st.session_state[
187194
f"model_type_{i}"
188195
],
189196
"work_mode": st.session_state[f"work_mode_{i}"],
190-
"enable_prefix_caching": st.session_state[
191-
f"enable_prefix_caching_{i}"
192-
],
193197
"device": st.session_state[f"device_{i}"],
194198
"workers": yaml.safe_load(
195199
st.session_state[f"workers_{i}"]
@@ -299,10 +303,12 @@ def on_change():
299303
model_name_input: {
300304
"alias": model_alias,
301305
"enable": enable,
302-
"model_name_or_path": model_name_or_path,
306+
"model_config": {
307+
"model_name_or_path": model_name_or_path,
308+
"enable_prefix_caching": enable_prefix_caching,
309+
},
303310
"model_type": model_type,
304311
"work_mode": work_mode,
305-
"enable_prefix_caching": enable_prefix_caching,
306312
"device": device,
307313
"workers": workers_value_dict,
308314
}

0 commit comments

Comments
 (0)