Skip to content

Commit 4a40bf6

Browse files
committed
更新infinity==0.0.73 vllm==0.6.5
1 parent 1dba906 commit 4a40bf6

File tree

5 files changed

+409
-154
lines changed

5 files changed

+409
-154
lines changed

docker-compose.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@ services:
66
context: .
77
dockerfile: Dockerfile.copy
88
image: gpt_server:v0.4.0
9-
shm_size: '4g' # 设置共享内存为4GB
9+
shm_size: '4g' # 设置共享内存为4GB
1010
container_name: gpt_server
1111
restart: always
1212
# network_mode: host
1313
ports:
14-
- "8082:8082"
14+
- 8082:8082
15+
environment:
16+
- TZ:Asia/Shanghai # 设置中国时区
1517
volumes:
16-
- "/home/dev/model/:/home/dev/model/"
18+
- /home/dev/model/:/home/dev/model/
1719
deploy:
1820
resources:
1921
reservations:

gpt_server/model_worker/embedding_infinity.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __init__(
6060

6161
async def astart(self):
6262
await self.engine.astart()
63-
63+
6464
def generate_stream_gate(self, params):
6565
pass
6666

@@ -76,10 +76,21 @@ async def get_embeddings(self, params):
7676
embedding = [embedding.tolist() for embedding in embeddings]
7777
elif self.mode == "rerank":
7878
query = params.get("query", None)
79-
scores, usage = await self.engine.rerank(
79+
ranking, usage = await self.engine.rerank(
8080
query=query, docs=texts, raw_scores=False
8181
)
82-
embedding = [[float(score)] for score in scores]
82+
ranking = [
83+
{
84+
"index": i.index,
85+
"relevance_score": i.relevance_score,
86+
"document": i.document,
87+
}
88+
for i in ranking
89+
]
90+
ranking.sort(key=lambda x: x["index"])
91+
embedding = [
92+
[round(float(score["relevance_score"]), 6)] for score in ranking
93+
]
8394
ret["embedding"] = embedding
8495
ret["token_num"] = usage
8596
return ret

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.3.2"
3+
version = "0.3.5"
44
description = "gpt_server是一个用于生产级部署LLMs或Embedding的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
@@ -12,7 +12,7 @@ dependencies = [
1212
"ffmpy",
1313
"fschat==0.2.36",
1414
"gradio==4.26.0",
15-
"infinity-emb[all]==0.0.53",
15+
"infinity-emb[all]==0.0.73",
1616
"lmdeploy==0.6.2",
1717
"loguru>=0.7.2",
1818
"openai==1.55.3",
@@ -21,7 +21,7 @@ dependencies = [
2121
"torch==2.5.1",
2222
"torchvision==0.20.1",
2323
"transformers==4.45.2",
24-
"vllm==0.6.4.post1",
24+
"vllm==0.6.5",
2525
"qwen_vl_utils",
2626
"evalscope[perf]==0.7.0",
2727
"modelscope==1.20.1",
@@ -33,6 +33,7 @@ override-dependencies = [
3333
"torchvision==0.20.1",
3434
"torch==2.5.1",
3535
"triton",
36+
"outlines==0.1.11",
3637

3738
]
3839

0 commit comments

Comments
 (0)