feat:use schema to structure mem_reader output

Nyakult · Nyakult · commit cc3ed0c1d6d3 · 2025-10-27T16:28:44.000+08:00
diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py
@@ -56,7 +56,7 @@ def clear_cache(cls):
         cls._instances.clear()
         logger.info("OpenAI LLM instance cache cleared")
 
-    def generate(self, messages: MessageList) -> str:
+    def generate(self, messages: MessageList, **kwargs) -> str:
         """Generate a response from OpenAI LLM."""
         response = self.client.chat.completions.create(
             model=self.config.model_name_or_path,
@@ -65,6 +65,7 @@ def generate(self, messages: MessageList) -> str:
             temperature=self.config.temperature,
             max_tokens=self.config.max_tokens,
             top_p=self.config.top_p,
+            **kwargs
         )
         logger.info(f"Response from OpenAI: {response.model_dump_json()}")
         response_content = response.choices[0].message.content
diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py
@@ -85,16 +85,16 @@ def build_vllm_kv_cache(self, messages: Any) -> str:
 
         return prompt
 
-    def generate(self, messages: list[MessageDict]) -> str:
+    def generate(self, messages: list[MessageDict], **kwargs) -> str:
         """
         Generate a response from the model.
         """
         if self.client:
-            return self._generate_with_api_client(messages)
+            return self._generate_with_api_client(messages, **kwargs)
         else:
             raise RuntimeError("API client is not available")
 
-    def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
+    def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> str:
         """
         Generate response using vLLM API client.
         """
@@ -106,6 +106,7 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
                 "max_tokens": int(getattr(self.config, "max_tokens", 1024)),
                 "top_p": float(getattr(self.config, "top_p", 0.9)),
                 "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
+                **kwargs
             }
 
             response = self.client.chat.completions.create(**completion_kwargs)
diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py
@@ -27,6 +27,7 @@
     SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
     SIMPLE_STRUCT_MEM_READER_PROMPT,
     SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
+    reader_output_schema
 )
 from memos.utils import timed
 
@@ -200,7 +201,9 @@ def _get_llm_response(self, mem_str: str) -> dict:
             prompt = prompt.replace(examples, "")
         messages = [{"role": "user", "content": prompt}]
         try:
-            response_text = self.llm.generate(messages)
+            response_text = self.llm.generate(messages,
+                                              response_format={"type": "json_object",
+                                                               "schema": reader_output_schema})
             response_json = self.parse_json_result(response_text)
         except Exception as e:
             logger.error(f"[LLM] Exception during chat generation: {e}")
diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py
@@ -417,3 +417,48 @@
 }
 
 """
+
+reader_output_schema = {
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "type": "object",
+  "properties": {
+    "memory list": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "key": {
+            "type": "string",
+            "description": "A brief title or identifier for the memory."
+          },
+          "memory_type": {
+            "type": "string",
+            "enum": ["LongTermMemory", "ShortTermMemory", "WorkingMemory"],
+            "description": "The type of memory, expected to be 'LongTermMemory' in this context."
+          },
+          "value": {
+            "type": "string",
+            "description": "Detailed description of the memory, including viewpoint, time, and content."
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "List of keywords or categories associated with the memory."
+          }
+        },
+        "required": ["key", "memory_type", "value", "tags"],
+        "additionalProperties": False
+      },
+      "description": "List of memory entries."
+    },
+    "summary": {
+      "type": "string",
+      "description": "A synthesized summary of the overall situation based on all memories."
+    }
+  },
+  "required": ["memory list", "summary"],
+  "additionalProperties": False,
+  "description": "Structured output containing a list of memories and a summary."
+}