Skip to content

Commit 84624d4

Browse files
authored
fix:merge error (#75)
## Description <!-- Please include a summary of the changes below; Fill in the issue number that this PR addresses (if applicable); Mention the person who will review this PR (if you know who it is); Replace (summary), (issue), and (reviewer) with the appropriate information (No parentheses). 请在下方填写更改的摘要; 填写此 PR 解决的问题编号(如果适用); 提及将审查此 PR 的人(如果您知道是谁); 替换 (summary)、(issue) 和 (reviewer) 为适当的信息(不带括号)。 --> Summary: (summary) Fix: #(issue) Reviewer: @(reviewer) ## Checklist: - [ ] I have performed a self-review of my own code | 我已自行检查了自己的代码 - [ ] I have commented my code in hard-to-understand areas | 我已在难以理解的地方对代码进行了注释 - [ ] I have added tests that prove my fix is effective or that my feature works | 我已添加测试以证明我的修复有效或功能正常 - [ ] I have added necessary documentation (if applicable) | 我已添加必要的文档(如果适用) - [ ] I have linked the issue to this PR (if applicable) | 我已将 issue 链接到此 PR(如果适用) - [ ] I have mentioned the person who will review this PR | 我已提及将审查此 PR 的人
2 parents 0dfe4cc + 0f6c286 commit 84624d4

File tree

2 files changed

+0
-34
lines changed

2 files changed

+0
-34
lines changed

examples/mem_os/simple_vllm_memos.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
"""
33
Simple example demonstrating how to use VLLMLLM with an existing vLLM server.
44
Requires a vLLM server to be running.
5-
Simple example demonstrating how to use VLLMLLM with an existing vLLM server.
6-
Requires a vLLM server to be running.
75
"""
86

97
from memos.configs.llm import VLLMLLMConfig
@@ -15,11 +13,9 @@ def main():
1513

1614
# Configuration for connecting to existing vLLM server
1715
config = VLLMLLMConfig(
18-
model_name_or_path="/mnt/afs/models/hf_models/Qwen2.5-7B", # MUST MATCH the --model arg of vLLM server
1916
model_name_or_path="/mnt/afs/models/hf_models/Qwen2.5-7B", # MUST MATCH the --model arg of vLLM server
2017
api_key="", # Not needed for local server
2118
api_base="http://localhost:8088/v1", # vLLM server address with /v1
22-
api_base="http://localhost:8088/v1", # vLLM server address with /v1
2319
temperature=0.7,
2420
max_tokens=512,
2521
top_p=0.9,
@@ -39,7 +35,6 @@ def main():
3935
try:
4036
prompt = llm.build_vllm_kv_cache(system_messages)
4137
print(f"✓ KV cache built successfully for prompt: '{prompt[:100]}...'")
42-
print(f"✓ KV cache built successfully for prompt: '{prompt[:100]}...'")
4338
except Exception as e:
4439
print(f"✗ Failed to build KV cache: {e}")
4540

src/memos/llms/vllm.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,7 @@ def __init__(self, config: VLLMLLMConfig):
3333
api_key=api_key,
3434
base_url=getattr(self.config, "api_base", "http://localhost:8088/v1")
3535
)
36-
api_key = getattr(self.config, "api_key", "dummy")
37-
if not api_key:
38-
api_key = "dummy"
39-
40-
import openai
41-
self.client = openai.Client(
42-
api_key=api_key,
43-
base_url=getattr(self.config, "api_base", "http://localhost:8088/v1")
44-
)
4536

46-
def build_vllm_kv_cache(self, messages: Any) -> str:
4737
def build_vllm_kv_cache(self, messages: Any) -> str:
4838
"""
4939
Build a KV cache from chat messages via one vLLM request.
@@ -67,21 +57,12 @@ def build_vllm_kv_cache(self, messages: Any) -> str:
6757

6858
if not prompt.strip():
6959
raise ValueError("Prompt is empty, cannot build KV cache.")
70-
raise ValueError("Prompt is empty, cannot build KV cache.")
7160

7261
# 3. Send request to vLLM server to preload the KV cache
73-
if self.client:
74-
try:
75-
# Use the processed messages for the API call
76-
# 3. Send request to vLLM server to preload the KV cache
7762
if self.client:
7863
try:
7964
# Use the processed messages for the API call
8065
prefill_kwargs = {
81-
"model": self.config.model_name_or_path,
82-
"messages": processed_messages,
83-
"max_tokens": 2,
84-
"temperature": 0.0,
8566
"model": self.config.model_name_or_path,
8667
"messages": processed_messages,
8768
"max_tokens": 2,
@@ -90,8 +71,6 @@ def build_vllm_kv_cache(self, messages: Any) -> str:
9071
}
9172
self.client.chat.completions.create(**prefill_kwargs)
9273
logger.info(f"vLLM KV cache prefill completed for prompt: '{prompt[:100]}...'")
93-
self.client.chat.completions.create(**prefill_kwargs)
94-
logger.info(f"vLLM KV cache prefill completed for prompt: '{prompt[:100]}...'")
9574
except Exception as e:
9675
logger.warning(f"Failed to prefill vLLM KV cache: {e}")
9776

@@ -101,7 +80,6 @@ def generate(self, messages: list[MessageDict]) -> str:
10180
"""
10281
Generate a response from the model.
10382
"""
104-
if self.client:
10583
if self.client:
10684
return self._generate_with_api_client(messages)
10785
else:
@@ -111,11 +89,8 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
11189
"""
11290
Generate response using vLLM API client.
11391
"""
114-
if self.client:
11592
if self.client:
11693
completion_kwargs = {
117-
"model": self.config.model_name_or_path,
118-
"messages": messages,
11994
"model": self.config.model_name_or_path,
12095
"messages": messages,
12196
"temperature": float(getattr(self.config, "temperature", 0.8)),
@@ -127,9 +102,6 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
127102
response_text = response.choices[0].message.content or ""
128103
logger.info(f"VLLM API response: {response_text}")
129104
return remove_thinking_tags(response_text) if getattr(self.config, "remove_think_prefix", False) else response_text
130-
response_text = response.choices[0].message.content or ""
131-
logger.info(f"VLLM API response: {response_text}")
132-
return remove_thinking_tags(response_text) if getattr(self.config, "remove_think_prefix", False) else response_text
133105
else:
134106
raise RuntimeError("API client is not available")
135107

@@ -142,7 +114,6 @@ def _messages_to_prompt(self, messages: list[MessageDict]) -> str:
142114
role = msg["role"]
143115
content = msg["content"]
144116
prompt_parts.append(f"{role.capitalize()}: {content}")
145-
prompt_parts.append(f"{role.capitalize()}: {content}")
146117
return "\n".join(prompt_parts)
147118

148119
def generate_stream(self, messages: list[MessageDict]):

0 commit comments

Comments
 (0)