Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/memos/llms/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def clear_cache(cls):
cls._instances.clear()
logger.info("OpenAI LLM instance cache cleared")

def generate(self, messages: MessageList) -> str:
def generate(self, messages: MessageList, **kwargs) -> str:
"""Generate a response from OpenAI LLM."""
response = self.client.chat.completions.create(
model=self.config.model_name_or_path,
Expand All @@ -65,6 +65,7 @@ def generate(self, messages: MessageList) -> str:
temperature=self.config.temperature,
max_tokens=self.config.max_tokens,
top_p=self.config.top_p,
**kwargs,
)
logger.info(f"Response from OpenAI: {response.model_dump_json()}")
response_content = response.choices[0].message.content
Expand Down
7 changes: 4 additions & 3 deletions src/memos/llms/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,16 @@ def build_vllm_kv_cache(self, messages: Any) -> str:

return prompt

def generate(self, messages: list[MessageDict]) -> str:
def generate(self, messages: list[MessageDict], **kwargs) -> str:
"""
Generate a response from the model.
"""
if self.client:
return self._generate_with_api_client(messages)
return self._generate_with_api_client(messages, **kwargs)
else:
raise RuntimeError("API client is not available")

def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> str:
"""
Generate response using vLLM API client.
"""
Expand All @@ -106,6 +106,7 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
"max_tokens": int(getattr(self.config, "max_tokens", 1024)),
"top_p": float(getattr(self.config, "top_p", 0.9)),
"extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
**kwargs,
}

response = self.client.chat.completions.create(**completion_kwargs)
Expand Down
5 changes: 4 additions & 1 deletion src/memos/mem_reader/simple_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
SIMPLE_STRUCT_MEM_READER_PROMPT,
SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
reader_output_schema,
)
from memos.utils import timed

Expand Down Expand Up @@ -200,7 +201,9 @@ def _get_llm_response(self, mem_str: str) -> dict:
prompt = prompt.replace(examples, "")
messages = [{"role": "user", "content": prompt}]
try:
response_text = self.llm.generate(messages)
response_text = self.llm.generate(
messages, response_format={"type": "json_object", "schema": reader_output_schema}
)
response_json = self.parse_json_result(response_text)
except Exception as e:
logger.error(f"[LLM] Exception during chat generation: {e}")
Expand Down
43 changes: 43 additions & 0 deletions src/memos/templates/mem_reader_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,3 +417,46 @@
}

"""

reader_output_schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"memory list": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "A brief title or identifier for the memory.",
},
"memory_type": {
"type": "string",
"enum": ["LongTermMemory", "ShortTermMemory", "WorkingMemory"],
"description": "The type of memory, expected to be 'LongTermMemory' in this context.",
},
"value": {
"type": "string",
"description": "Detailed description of the memory, including viewpoint, time, and content.",
},
"tags": {
"type": "array",
"items": {"type": "string"},
"description": "List of keywords or categories associated with the memory.",
},
},
"required": ["key", "memory_type", "value", "tags"],
"additionalProperties": False,
},
"description": "List of memory entries.",
},
"summary": {
"type": "string",
"description": "A synthesized summary of the overall situation based on all memories.",
},
},
"required": ["memory list", "summary"],
"additionalProperties": False,
"description": "Structured output containing a list of memories and a summary.",
}