shell-nlp
diff --git a/‎gpt_server/model_backend/vllm_backend.py‎
Lines changed: 1 addition & 1 deletion b/‎gpt_server/model_backend/vllm_backend.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 13 additions & 20 deletions b/‎pyproject.toml‎
Lines changed: 13 additions & 20 deletions
@@ -87,7 +87,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
         if multimodal:  # 多模态模型
             # ----------------------------------------------------------------
             model_config = await self.engine.get_model_config()
-            conversation, mm_data_future = parse_chat_messages_futures(
+            conversation, mm_data_future, _ = parse_chat_messages_futures(
                 messages, model_config, tokenizer, content_format="string"
             )
 
 
@@ -1,6 +1,6 @@
 [project]
 name = "gpt_server"
-version = "0.6.3"
+version = "0.6.4"
 description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
 readme = "README.md"
 license = { text = "Apache 2.0" }
@@ -11,57 +11,50 @@ dependencies = [
     "fastapi==0.115.0",
     "ffmpy",
     "fschat==0.2.36",
-    "infinity-emb[all]==0.0.76",
+    "infinity-emb[all]==0.0.77",
     "lmdeploy==0.10.0",
     "loguru>=0.7.2",
     "openai==1.99.1",
     "setuptools==75.2.0",
     "streamlit==1.39.0",
-    "torch==2.6.0",
+    "torch==2.8.0",
     "torchvision==0.20.1",
-    "vllm",
+    "vllm==0.10.2",
     "qwen_vl_utils",
     "evalscope[perf,rag]==0.16.1",
     "modelscope==1.26.0",
     "edge-tts>=7.0.0",
     "funasr>=1.2.6",
-    "sglang[all]>=0.5.1.post3",
+    "sglang[all]>=0.5.2",
     "flashinfer-python",
     "flashtts>=0.1.7",
     "diffusers>=0.35.1",
     #"sqlmodel>=0.0.24",
     "autoawq>=0.2.9",
+    "flash-attn",
 ]
 
 [tool.uv]
 default-groups = [] # 默认只安装dependencies中的库
 override-dependencies = [
     "setuptools==77.0.3",
     "torchvision==0.23.0",
-    "torchaudio==2.8.0",
-    "torch==2.8.0",
-    "triton==3.4.0",
-    "transformers==4.56.1",      #  infinity-emb
-    "soundfile==0.13.1",         # infinity
-    "xgrammar==0.1.23",          #  sglang[all]==0.4.5 depends on xgrammar==0.1.17
-    "flashinfer-python==0.2.10",
-    "outlines-core==0.2.10",     # sglang 和 vllm 的冲突
-    "peft>=0.17.0",              # 和 lmdeloy 冲突
+    "transformers==4.56.1",  #  infinity-emb
+    "soundfile==0.13.1",     # infinity
+    "xgrammar==0.1.24",      #  sglang[all]==0.4.5 depends on xgrammar==0.1.17
+    "outlines-core==0.2.11", # sglang 和 vllm 的冲突
+    "peft>=0.17.0",          # 和 lmdeloy 冲突
 ]
 
 [project.scripts]
 gpt_server = "gpt_server.cli:main"
 
-[tool.uv.sources]
-vllm = { index = "vllm-custom" }
+[tool.uv.extra-build-dependencies]
+flash-attn = ["torch"]
 
 [[tool.uv.index]]
 url = "https://pypi.tuna.tsinghua.edu.cn/simple"
-default = true
 
-[[tool.uv.index]]
-name = "vllm-custom"
-url = "https://wheels.vllm.ai/006e7a34aeb3e905ca4131a3251fe079f0511e2f"
 
 [build-system]
 requires = ["setuptools", "wheel"]
Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:`
`87`	`87`	`if multimodal: # 多模态模型`
`88`	`88`	`# ----------------------------------------------------------------`
`89`	`89`	`model_config = await self.engine.get_model_config()`
`90`		`- conversation, mm_data_future = parse_chat_messages_futures(`
	`90`	`+ conversation, mm_data_future, _ = parse_chat_messages_futures(`
`91`	`91`	`messages, model_config, tokenizer, content_format="string"`
`92`	`92`	`)`
`93`	`93`