Feat: update chatbot for postprocessing memory (#267)

fridayL · web-flow · commit 1b195a55f51e · 2025-09-04T11:15:25.000+08:00
feat: update post processing  memory for chatbot
diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py
@@ -97,6 +97,8 @@ class ChatCompleteRequest(BaseRequest):
     internet_search: bool = Field(False, description="Whether to use internet search")
     moscube: bool = Field(False, description="Whether to use MemOSCube")
     base_prompt: str | None = Field(None, description="Base prompt to use for chat")
+    top_k: int = Field(10, description="Number of results to return")
+    threshold: float = Field(0.5, description="Threshold for filtering references")
 
 
 class UserCreate(BaseRequest):
diff --git a/src/memos/api/routers/product_router.py b/src/memos/api/routers/product_router.py
@@ -284,18 +284,23 @@ def chat_complete(chat_req: ChatCompleteRequest):
         mos_product = get_mos_product_instance()
 
         # Collect all responses from the generator
-        content = mos_product.chat(
+        content, references = mos_product.chat(
             query=chat_req.query,
             user_id=chat_req.user_id,
             cube_id=chat_req.mem_cube_id,
             history=chat_req.history,
             internet_search=chat_req.internet_search,
             moscube=chat_req.moscube,
             base_prompt=chat_req.base_prompt,
+            top_k=chat_req.top_k,
+            threshold=chat_req.threshold,
         )
 
         # Return the complete response
-        return {"message": "Chat completed successfully", "data": {"response": content}}
+        return {
+            "message": "Chat completed successfully",
+            "data": {"response": content, "references": references},
+        }
 
     except ValueError as err:
         raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py
@@ -889,11 +889,13 @@ def chat(
         internet_search: bool = False,
         moscube: bool = False,
         top_k: int = 10,
+        threshold: float = 0.5,
     ) -> str:
         """
         Chat with LLM with memory references and complete response.
         """
         self._load_user_cubes(user_id, self.default_cube_config)
+        time_start = time.time()
         memories_result = super().search(
             query,
             user_id,
@@ -905,14 +907,30 @@ def chat(
         )["text_mem"]
         if memories_result:
             memories_list = memories_result[0]["memories"]
-            memories_list = self._filter_memories_by_threshold(memories_list)
+            memories_list = self._filter_memories_by_threshold(memories_list, threshold)
         system_prompt = super()._build_system_prompt(memories_list, base_prompt)
+        history_info = []
+        if history:
+            history_info = history[-20:]
         current_messages = [
             {"role": "system", "content": system_prompt},
+            *history_info,
             {"role": "user", "content": query},
         ]
         response = self.chat_llm.generate(current_messages)
-        return response
+        time_end = time.time()
+        self._start_post_chat_processing(
+            user_id=user_id,
+            cube_id=cube_id,
+            query=query,
+            full_response=response,
+            system_prompt=system_prompt,
+            time_start=time_start,
+            time_end=time_end,
+            speed_improvement=0.0,
+            current_messages=current_messages,
+        )
+        return response, memories_list
 
     def chat_with_references(
         self,
@@ -973,7 +991,7 @@ def chat_with_references(
 
         chat_history = self.chat_history_manager[user_id]
         if history:
-            chat_history.chat_history = history[-10:]
+            chat_history.chat_history = history[-20:]
         current_messages = [
             {"role": "system", "content": system_prompt},
             *chat_history.chat_history,