Skip to content

Commit 4572d30

Browse files
CaralHsiCopilot
andauthored
Feat: More Llm API (#113)
* fix: language bug * feat: add qwen api * feat: modify qwen llm * feat: add deepseek llm * feat: add stream output for openai * test: add unit test for llms * Update src/memos/llms/deepseek.py Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * fix: multi llm test bug --------- Co-authored-by: Copilot <[email protected]>
1 parent 8eb0738 commit 4572d30

File tree

12 files changed

+539
-1
lines changed

12 files changed

+539
-1
lines changed

examples/basic_modules/llm.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@
6969
print("Scenario 3:", response)
7070
print("==" * 20)
7171

72+
print("Scenario 3:\n")
73+
for chunk in llm.generate_stream(messages):
74+
print(chunk, end="")
75+
print("==" * 20)
76+
7277

7378
# Scenario 4: Using LLMFactory with Huggingface Models
7479

@@ -91,3 +96,89 @@
9196
response = llm.generate(messages)
9297
print("Scenario 4:", response)
9398
print("==" * 20)
99+
100+
101+
# Scenario 5: Using LLMFactory with Qwen (DashScope Compatible API)
102+
# Note:
103+
# This example works for any model that supports the OpenAI-compatible Chat Completion API,
104+
# including but not limited to:
105+
# - Qwen models: qwen-plus, qwen-max-2025-01-25
106+
# - DeepSeek models: deepseek-chat, deepseek-coder, deepseek-v3
107+
# - Other compatible providers: MiniMax, Fireworks, Groq, OpenRouter, etc.
108+
#
109+
# Just set the correct `api_key`, `api_base`, and `model_name_or_path`.
110+
111+
config = LLMConfigFactory.model_validate(
112+
{
113+
"backend": "qwen",
114+
"config": {
115+
"model_name_or_path": "qwen-plus", # or qwen-max-2025-01-25
116+
"temperature": 0.7,
117+
"max_tokens": 1024,
118+
"top_p": 0.9,
119+
"top_k": 50,
120+
"api_key": "sk-xxx",
121+
"api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
122+
},
123+
}
124+
)
125+
llm = LLMFactory.from_config(config)
126+
messages = [
127+
{"role": "user", "content": "Hello, who are you"},
128+
]
129+
response = llm.generate(messages)
130+
print("Scenario 5:", response)
131+
print("==" * 20)
132+
133+
print("Scenario 5:\n")
134+
for chunk in llm.generate_stream(messages):
135+
print(chunk, end="")
136+
print("==" * 20)
137+
138+
# Scenario 6: Using LLMFactory with Deepseek-chat
139+
140+
cfg = LLMConfigFactory.model_validate(
141+
{
142+
"backend": "deepseek",
143+
"config": {
144+
"model_name_or_path": "deepseek-chat",
145+
"api_key": "sk-xxx",
146+
"api_base": "https://api.deepseek.com",
147+
"temperature": 0.6,
148+
"max_tokens": 512,
149+
"remove_think_prefix": False,
150+
},
151+
}
152+
)
153+
llm = LLMFactory.from_config(cfg)
154+
messages = [{"role": "user", "content": "Hello, who are you"}]
155+
resp = llm.generate(messages)
156+
print("Scenario 6:", resp)
157+
158+
159+
# Scenario 7: Using LLMFactory with Deepseek-chat + reasoning + CoT + streaming
160+
161+
cfg2 = LLMConfigFactory.model_validate(
162+
{
163+
"backend": "deepseek",
164+
"config": {
165+
"model_name_or_path": "deepseek-reasoner",
166+
"api_key": "sk-xxx",
167+
"api_base": "https://api.deepseek.com",
168+
"temperature": 0.2,
169+
"max_tokens": 1024,
170+
"remove_think_prefix": False,
171+
},
172+
}
173+
)
174+
llm = LLMFactory.from_config(cfg2)
175+
messages = [
176+
{
177+
"role": "user",
178+
"content": "Explain how to solve this problem step-by-step. Be explicit in your thinking process. Question: If a train travels from city A to city B at 60 mph and returns at 40 mph, what is its average speed for the entire trip? Let's think step by step.",
179+
},
180+
]
181+
print("Scenario 7:\n")
182+
for chunk in llm.generate_stream(messages):
183+
print(chunk, end="")
184+
print("==" * 20)

src/memos/configs/llm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,28 @@ class OpenAILLMConfig(BaseLLMConfig):
2727
extra_body: Any = Field(default=None, description="extra body")
2828

2929

30+
class QwenLLMConfig(BaseLLMConfig):
31+
api_key: str = Field(..., description="API key for DashScope (Qwen)")
32+
api_base: str = Field(
33+
default="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
34+
description="Base URL for Qwen OpenAI-compatible API",
35+
)
36+
extra_body: Any = Field(default=None, description="extra body")
37+
model_name_or_path: str = Field(..., description="Model name for Qwen, e.g., 'qwen-plus'")
38+
39+
40+
class DeepSeekLLMConfig(BaseLLMConfig):
41+
api_key: str = Field(..., description="API key for DeepSeek")
42+
api_base: str = Field(
43+
default="https://api.deepseek.com",
44+
description="Base URL for DeepSeek OpenAI-compatible API",
45+
)
46+
extra_body: Any = Field(default=None, description="Extra options for API")
47+
model_name_or_path: str = Field(
48+
..., description="Model name: 'deepseek-chat' or 'deepseek-reasoner'"
49+
)
50+
51+
3052
class AzureLLMConfig(BaseLLMConfig):
3153
base_url: str = Field(
3254
default="https://api.openai.azure.com/",
@@ -78,6 +100,8 @@ class LLMConfigFactory(BaseConfig):
78100
"huggingface": HFLLMConfig,
79101
"vllm": VLLMLLMConfig,
80102
"huggingface_singleton": HFLLMConfig, # Add singleton support
103+
"qwen": QwenLLMConfig,
104+
"deepseek": DeepSeekLLMConfig,
81105
}
82106

83107
@field_validator("backend")

src/memos/llms/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from abc import ABC, abstractmethod
2+
from collections.abc import Generator
23

34
from memos.configs.llm import BaseLLMConfig
45
from memos.types import MessageList
@@ -14,3 +15,11 @@ def __init__(self, config: BaseLLMConfig):
1415
@abstractmethod
1516
def generate(self, messages: MessageList, **kwargs) -> str:
1617
"""Generate a response from the LLM."""
18+
19+
@abstractmethod
20+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
21+
"""
22+
(Optional) Generate a streaming response from the LLM.
23+
Subclasses should override this if they support streaming.
24+
By default, this raises NotImplementedError.
25+
"""

src/memos/llms/deepseek.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from collections.abc import Generator
2+
3+
from memos.configs.llm import DeepSeekLLMConfig
4+
from memos.llms.openai import OpenAILLM
5+
from memos.llms.utils import remove_thinking_tags
6+
from memos.log import get_logger
7+
from memos.types import MessageList
8+
9+
10+
logger = get_logger(__name__)
11+
12+
13+
class DeepSeekLLM(OpenAILLM):
14+
"""DeepSeek LLM via OpenAI-compatible API."""
15+
16+
def __init__(self, config: DeepSeekLLMConfig):
17+
super().__init__(config)
18+
19+
def generate(self, messages: MessageList) -> str:
20+
"""Generate a response from DeepSeek."""
21+
response = self.client.chat.completions.create(
22+
model=self.config.model_name_or_path,
23+
messages=messages,
24+
temperature=self.config.temperature,
25+
max_tokens=self.config.max_tokens,
26+
top_p=self.config.top_p,
27+
extra_body=self.config.extra_body,
28+
)
29+
logger.info(f"Response from DeepSeek: {response.model_dump_json()}")
30+
response_content = response.choices[0].message.content
31+
if self.config.remove_think_prefix:
32+
return remove_thinking_tags(response_content)
33+
else:
34+
return response_content
35+
36+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
37+
"""Stream response from DeepSeek."""
38+
response = self.client.chat.completions.create(
39+
model=self.config.model_name_or_path,
40+
messages=messages,
41+
stream=True,
42+
temperature=self.config.temperature,
43+
max_tokens=self.config.max_tokens,
44+
top_p=self.config.top_p,
45+
extra_body=self.config.extra_body,
46+
)
47+
# Streaming chunks of text
48+
for chunk in response:
49+
delta = chunk.choices[0].delta
50+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
51+
yield delta.reasoning_content
52+
53+
if hasattr(delta, "content") and delta.content:
54+
yield delta.content

src/memos/llms/factory.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
from memos.configs.llm import LLMConfigFactory
44
from memos.llms.base import BaseLLM
5+
from memos.llms.deepseek import DeepSeekLLM
56
from memos.llms.hf import HFLLM
67
from memos.llms.hf_singleton import HFSingletonLLM
78
from memos.llms.ollama import OllamaLLM
89
from memos.llms.openai import AzureLLM, OpenAILLM
10+
from memos.llms.qwen import QwenLLM
911
from memos.llms.vllm import VLLMLLM
1012

1113

@@ -19,6 +21,8 @@ class LLMFactory(BaseLLM):
1921
"huggingface": HFLLM,
2022
"huggingface_singleton": HFSingletonLLM, # Add singleton version
2123
"vllm": VLLMLLM,
24+
"qwen": QwenLLM,
25+
"deepseek": DeepSeekLLM,
2226
}
2327

2428
@classmethod

src/memos/llms/ollama.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections.abc import Generator
12
from typing import Any
23

34
from ollama import Client
@@ -80,3 +81,6 @@ def generate(self, messages: MessageList) -> Any:
8081
return remove_thinking_tags(str_response)
8182
else:
8283
return str_response
84+
85+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
86+
raise NotImplementedError

src/memos/llms/openai.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from collections.abc import Generator
2+
13
import openai
24

35
from memos.configs.llm import AzureLLMConfig, OpenAILLMConfig
@@ -34,6 +36,39 @@ def generate(self, messages: MessageList) -> str:
3436
else:
3537
return response_content
3638

39+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
40+
"""Stream response from OpenAI LLM with optional reasoning support."""
41+
response = self.client.chat.completions.create(
42+
model=self.config.model_name_or_path,
43+
messages=messages,
44+
stream=True,
45+
temperature=self.config.temperature,
46+
max_tokens=self.config.max_tokens,
47+
top_p=self.config.top_p,
48+
extra_body=self.config.extra_body,
49+
)
50+
51+
reasoning_started = False
52+
53+
for chunk in response:
54+
delta = chunk.choices[0].delta
55+
56+
# Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen)
57+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
58+
if not reasoning_started and not self.config.remove_think_prefix:
59+
yield "<think>"
60+
reasoning_started = True
61+
yield delta.reasoning_content
62+
elif hasattr(delta, "content") and delta.content:
63+
if reasoning_started and not self.config.remove_think_prefix:
64+
yield "</think>"
65+
reasoning_started = False
66+
yield delta.content
67+
68+
# Ensure we close the <think> block if not already done
69+
if reasoning_started and not self.config.remove_think_prefix:
70+
yield "</think>"
71+
3772

3873
class AzureLLM(BaseLLM):
3974
"""Azure OpenAI LLM class."""
@@ -61,3 +96,6 @@ def generate(self, messages: MessageList) -> str:
6196
return remove_thinking_tags(response_content)
6297
else:
6398
return response_content
99+
100+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
101+
raise NotImplementedError

src/memos/llms/qwen.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from collections.abc import Generator
2+
3+
from memos.configs.llm import QwenLLMConfig
4+
from memos.llms.openai import OpenAILLM
5+
from memos.llms.utils import remove_thinking_tags
6+
from memos.log import get_logger
7+
from memos.types import MessageList
8+
9+
10+
logger = get_logger(__name__)
11+
12+
13+
class QwenLLM(OpenAILLM):
14+
"""Qwen (DashScope) LLM class via OpenAI-compatible API."""
15+
16+
def __init__(self, config: QwenLLMConfig):
17+
super().__init__(config)
18+
19+
def generate(self, messages: MessageList) -> str:
20+
"""Generate a response from Qwen LLM."""
21+
response = self.client.chat.completions.create(
22+
model=self.config.model_name_or_path,
23+
messages=messages,
24+
extra_body=self.config.extra_body,
25+
temperature=self.config.temperature,
26+
max_tokens=self.config.max_tokens,
27+
top_p=self.config.top_p,
28+
)
29+
logger.info(f"Response from Qwen: {response.model_dump_json()}")
30+
response_content = response.choices[0].message.content
31+
if self.config.remove_think_prefix:
32+
return remove_thinking_tags(response_content)
33+
else:
34+
return response_content
35+
36+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
37+
"""Stream response from Qwen LLM."""
38+
response = self.client.chat.completions.create(
39+
model=self.config.model_name_or_path,
40+
messages=messages,
41+
stream=True,
42+
temperature=self.config.temperature,
43+
max_tokens=self.config.max_tokens,
44+
top_p=self.config.top_p,
45+
extra_body=self.config.extra_body,
46+
)
47+
48+
reasoning_started = False
49+
for chunk in response:
50+
delta = chunk.choices[0].delta
51+
52+
# Some models may have separate `reasoning_content` vs `content`
53+
# For Qwen (DashScope), likely only `content` is used
54+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
55+
if not reasoning_started and not self.config.remove_think_prefix:
56+
yield "<think>"
57+
reasoning_started = True
58+
yield delta.reasoning_content
59+
elif hasattr(delta, "content") and delta.content:
60+
if reasoning_started and not self.config.remove_think_prefix:
61+
yield "</think>"
62+
reasoning_started = False
63+
yield delta.content

src/memos/templates/mem_reader_prompts.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
"summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
7272
}
7373
74-
Another Example in Chinese (注意: 你的输出必须和输入的user语言一致):
74+
Another Example in Chinese (注意: 当user的语言为中文时,你就需要也输出中文):
7575
{
7676
"memory list": [
7777
{
@@ -85,6 +85,8 @@
8585
"summary": "Tom 目前专注于管理一个进度紧张的新项目..."
8686
}
8787
88+
Always respond in the same language as the conversation.
89+
8890
Conversation:
8991
${conversation}
9092

0 commit comments

Comments
 (0)