Skip to content

Commit 4a65448

Browse files
committed
更新 vllm==0.7.2 lmdeploy=0.7.0 transformers==4.48.2 pynvml==12.0.0
1 parent ddcfa70 commit 4a65448

File tree

5 files changed

+179
-187
lines changed

5 files changed

+179
-187
lines changed

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ services:
55
build:
66
context: .
77
dockerfile: Dockerfile.copy
8-
image: gpt_server:v0.4.0
8+
image: gpt_server:_latest
99
shm_size: '4g' # 设置共享内存为4GB
1010
container_name: gpt_server
1111
restart: always

gpt_server/script/config.yaml

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
serve_args:
2+
# openai 服务的 host 和 port
3+
enable: true
24
host: 0.0.0.0
35
port: 8082
4-
controller_address: http://localhost:21001
5-
api_keys: null
6+
controller_address: http://localhost:21001 # 控制器的ip地址
7+
api_keys: null # api_keys: 111,222 # 用来设置 openai 密钥
68
controller_args:
9+
# 控制器的配置参数
10+
enable: true
711
host: 0.0.0.0
812
port: 21001
9-
dispatch_method: shortest_queue
13+
dispatch_method: shortest_queue # lottery、shortest_queue # 现有两种请求分发策略,随机(lottery) 和 最短队列(shortest_queue),最短队列方法更推荐。
14+
1015
model_worker_args:
16+
# 模型的配置参数,这里port 不能设置,程序自动分配,并注册到 控制器中。
17+
# model worker 的配置参数
1118
host: 0.0.0.0
1219
controller_address: http://localhost:21001
1320
models:
@@ -47,21 +54,26 @@ models:
4754
workers:
4855
- gpus:
4956
- 3
50-
- qwen-72b:
51-
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
57+
58+
- qwen-32b:
59+
alias: qwen,gpt-4,gpt-4o,gpt-3.5-turbo,gpt-3.5-turbo-16k
5260
enable: true
5361
model_config:
54-
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
62+
model_name_or_path: /home/dev/model/Qwen/Qwen2___5-32B-Instruct-AWQ/
5563
enable_prefix_caching: true
5664
dtype: auto
5765
max_model_len: 65536
66+
kv_cache_quant_policy: 8
5867
model_type: qwen
5968
work_mode: lmdeploy-turbomind
6069
device: gpu
6170
workers:
6271
- gpus:
6372
- 0
6473
- 1
74+
# - gpus:
75+
# - 3
76+
# - 2
6577
- piccolo-base-zh:
6678
alias: null
6779
enable: true
@@ -73,11 +85,11 @@ models:
7385
workers:
7486
- gpus:
7587
- 2
76-
- bce-embedding-base_v1:
77-
alias: text-embedding-ada-002
88+
- injection:
89+
alias: null
7890
enable: true
7991
model_config:
80-
model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/
92+
model_name_or_path: /home/dev/model/protectai/deberta-v3-base-prompt-injection-v2
8193
model_type: embedding_infinity
8294
work_mode: hf
8395
device: gpu
@@ -95,11 +107,11 @@ models:
95107
workers:
96108
- gpus:
97109
- 2
98-
- acge_text_embedding:
99-
alias: text-embedding-ada-002
100-
enable: true
110+
- MiniCPM-Embedding:
111+
alias: null
112+
enable: false
101113
model_config:
102-
model_name_or_path: /home/dev/model/aspire/acge_text_embedding
114+
model_name_or_path: /home/dev/model/openbmb/MiniCPM-Embedding
103115
model_type: embedding_infinity
104116
work_mode: hf
105117
device: gpu

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ dependencies = [
1313
"fschat==0.2.36",
1414
"gradio==4.26.0",
1515
"infinity-emb[all]==0.0.73",
16-
"lmdeploy==0.6.2",
16+
"lmdeploy==0.7.0",
1717
"loguru>=0.7.2",
1818
"openai==1.55.3",
1919
"setuptools==75.2.0",
2020
"streamlit==1.39.0",
2121
"torch==2.5.1",
2222
"torchvision==0.20.1",
23-
"transformers==4.45.2",
24-
"vllm==0.6.6.post1",
23+
"transformers==4.48.2",
24+
"vllm==0.7.2",
2525
"qwen_vl_utils",
2626
"evalscope[perf]==0.7.0",
2727
"modelscope==1.20.1",
@@ -35,6 +35,7 @@ override-dependencies = [
3535
"torch==2.5.1",
3636
"triton",
3737
"outlines==0.1.11",
38+
"pynvml==12.0.0" # 解决vllm==0.7.2的bug https://github.com/vllm-project/vllm/issues/12847,后面可去掉
3839

3940
]
4041

0 commit comments

Comments
 (0)