Skip to content

Commit c103b32

Browse files
committed
fix:merge error
1 parent e072baa commit c103b32

File tree

2 files changed

+4
-38
lines changed

2 files changed

+4
-38
lines changed

examples/mem_os/simple_vllm_memos.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,20 @@
22
"""
33
Simple example demonstrating how to use VLLMLLM with an existing vLLM server.
44
Requires a vLLM server to be running.
5-
Simple example demonstrating how to use VLLMLLM with an existing vLLM server.
6-
Requires a vLLM server to be running.
75
"""
86

97
from memos.configs.llm import VLLMLLMConfig
108
from memos.llms.vllm import VLLMLLM
11-
from memos.types import MessageDict
9+
from memos.types import MessageList
1210

1311
def main():
1412
"""Main function demonstrating VLLMLLM usage."""
1513

1614
# Configuration for connecting to existing vLLM server
1715
config = VLLMLLMConfig(
18-
model_name_or_path="/mnt/afs/models/hf_models/Qwen2.5-7B", # MUST MATCH the --model arg of vLLM server
1916
model_name_or_path="/mnt/afs/models/hf_models/Qwen2.5-7B", # MUST MATCH the --model arg of vLLM server
2017
api_key="", # Not needed for local server
2118
api_base="http://localhost:8088/v1", # vLLM server address with /v1
22-
api_base="http://localhost:8088/v1", # vLLM server address with /v1
2319
temperature=0.7,
2420
max_tokens=512,
2521
top_p=0.9,
@@ -32,21 +28,20 @@ def main():
3228

3329
# Test messages for KV cache building
3430
print("\nBuilding KV cache for system messages...")
35-
system_messages: list[MessageDict] = [
31+
system_messages: MessageList = [
3632
{"role": "system", "content": "You are a helpful AI assistant."},
3733
{"role": "user", "content": "Hello! Can you tell me about vLLM?"}
3834
]
3935
try:
4036
prompt = llm.build_vllm_kv_cache(system_messages)
4137
print(f"✓ KV cache built successfully for prompt: '{prompt[:100]}...'")
42-
print(f"✓ KV cache built successfully for prompt: '{prompt[:100]}...'")
4338
except Exception as e:
4439
print(f"✗ Failed to build KV cache: {e}")
4540

4641
# Test with different messages for generation
4742
print("\nGenerating response...")
48-
user_messages: list[MessageDict] = [
49-
{"role": "system", "content": "You are a helpful AI assistant. Please Introduce yourself "},
43+
user_messages: MessageList = [
44+
{"role": "system", "content": "You are a helpful AI assistant."},
5045
{"role": "user", "content": "What are the benefits of using vLLM?"}
5146
]
5247
try:

src/memos/llms/vllm.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,7 @@ def __init__(self, config: VLLMLLMConfig):
3333
api_key=api_key,
3434
base_url=getattr(self.config, "api_base", "http://localhost:8088/v1")
3535
)
36-
api_key = getattr(self.config, "api_key", "dummy")
37-
if not api_key:
38-
api_key = "dummy"
39-
40-
import openai
41-
self.client = openai.Client(
42-
api_key=api_key,
43-
base_url=getattr(self.config, "api_base", "http://localhost:8088/v1")
44-
)
4536

46-
def build_vllm_kv_cache(self, messages: Any) -> str:
4737
def build_vllm_kv_cache(self, messages: Any) -> str:
4838
"""
4939
Build a KV cache from chat messages via one vLLM request.
@@ -67,21 +57,12 @@ def build_vllm_kv_cache(self, messages: Any) -> str:
6757

6858
if not prompt.strip():
6959
raise ValueError("Prompt is empty, cannot build KV cache.")
70-
raise ValueError("Prompt is empty, cannot build KV cache.")
7160

7261
# 3. Send request to vLLM server to preload the KV cache
73-
if self.client:
74-
try:
75-
# Use the processed messages for the API call
76-
# 3. Send request to vLLM server to preload the KV cache
7762
if self.client:
7863
try:
7964
# Use the processed messages for the API call
8065
prefill_kwargs = {
81-
"model": self.config.model_name_or_path,
82-
"messages": processed_messages,
83-
"max_tokens": 2,
84-
"temperature": 0.0,
8566
"model": self.config.model_name_or_path,
8667
"messages": processed_messages,
8768
"max_tokens": 2,
@@ -90,8 +71,6 @@ def build_vllm_kv_cache(self, messages: Any) -> str:
9071
}
9172
self.client.chat.completions.create(**prefill_kwargs)
9273
logger.info(f"vLLM KV cache prefill completed for prompt: '{prompt[:100]}...'")
93-
self.client.chat.completions.create(**prefill_kwargs)
94-
logger.info(f"vLLM KV cache prefill completed for prompt: '{prompt[:100]}...'")
9574
except Exception as e:
9675
logger.warning(f"Failed to prefill vLLM KV cache: {e}")
9776

@@ -101,7 +80,6 @@ def generate(self, messages: list[MessageDict]) -> str:
10180
"""
10281
Generate a response from the model.
10382
"""
104-
if self.client:
10583
if self.client:
10684
return self._generate_with_api_client(messages)
10785
else:
@@ -111,11 +89,8 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
11189
"""
11290
Generate response using vLLM API client.
11391
"""
114-
if self.client:
11592
if self.client:
11693
completion_kwargs = {
117-
"model": self.config.model_name_or_path,
118-
"messages": messages,
11994
"model": self.config.model_name_or_path,
12095
"messages": messages,
12196
"temperature": float(getattr(self.config, "temperature", 0.8)),
@@ -127,9 +102,6 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
127102
response_text = response.choices[0].message.content or ""
128103
logger.info(f"VLLM API response: {response_text}")
129104
return remove_thinking_tags(response_text) if getattr(self.config, "remove_think_prefix", False) else response_text
130-
response_text = response.choices[0].message.content or ""
131-
logger.info(f"VLLM API response: {response_text}")
132-
return remove_thinking_tags(response_text) if getattr(self.config, "remove_think_prefix", False) else response_text
133105
else:
134106
raise RuntimeError("API client is not available")
135107

@@ -142,7 +114,6 @@ def _messages_to_prompt(self, messages: list[MessageDict]) -> str:
142114
role = msg["role"]
143115
content = msg["content"]
144116
prompt_parts.append(f"{role.capitalize()}: {content}")
145-
prompt_parts.append(f"{role.capitalize()}: {content}")
146117
return "\n".join(prompt_parts)
147118

148119
def generate_stream(self, messages: list[MessageDict]):

0 commit comments

Comments
 (0)