fix: product and format utils and change prompt

fridayL · fridayL · commit 621ebb070a09 · 2025-07-28T11:04:51.000Z
diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py
@@ -22,6 +22,7 @@
     filter_nodes_by_tree_ids,
     remove_embedding_recursive,
     sort_children_by_memory_type,
+    split_continuous_references,
 )
 from memos.mem_scheduler.schemas.general_schemas import (
     ANSWER_LABEL,
@@ -33,6 +34,7 @@
 from memos.memories.textual.item import (
     TextualMemoryItem,
 )
+from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
 from memos.types import MessageList
 
 
@@ -358,28 +360,49 @@ def _build_system_prompt(self, user_id: str, memories_all: list[TextualMemoryIte
         """
 
         # Build base prompt
-        base_prompt = (
-            "You are a knowledgeable and helpful AI assistant with access to user memories. "
-            "When responding to user queries, you should reference relevant memories using the provided memory IDs. "
-            "Use the reference format: [1-n:memoriesID] "
-            "where refid is a sequential number starting from 1 and increments for each reference in your response, "
-            "and memoriesID is the specific memory ID provided in the available memories list. "
-            "For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
-            "Only reference memories that are directly relevant to the user's question. "
-            "Make your responses natural and conversational while incorporating memory references when appropriate."
-        )
-
         # Add memory context if available
         if memories_all:
             memory_context = "\n\n## Available ID Memories:\n"
             for i, memory in enumerate(memories_all, 1):
                 # Format: [memory_id]: memory_content
                 memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
                 memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
+                memory_content = memory_content.replace("\n", " ")
                 memory_context += f"{memory_id}: {memory_content}\n"
-            return base_prompt + memory_context
+            return MEMOS_PRODUCT_BASE_PROMPT + memory_context
 
-        return base_prompt
+        return MEMOS_PRODUCT_BASE_PROMPT
+
+    def _build_enhance_system_prompt(
+        self, user_id: str, memories_all: list[TextualMemoryItem]
+    ) -> str:
+        """
+        Build enhance prompt for the user with memory references.
+        """
+        if memories_all:
+            personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
+            outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
+            for i, memory in enumerate(memories_all, 1):
+                # Format: [memory_id]: memory_content
+                if memory.metadata.memory_type != "OuterMemory":
+                    memory_id = (
+                        f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
+                    )
+                    memory_content = (
+                        memory.memory[:500] if hasattr(memory, "memory") else str(memory)
+                    )
+                    personal_memory_context += f"{memory_id}: {memory_content}\n"
+                else:
+                    memory_id = (
+                        f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
+                    )
+                    memory_content = (
+                        memory.memory[:500] if hasattr(memory, "memory") else str(memory)
+                    )
+                    memory_content = memory_content.replace("\n", " ")
+                    outer_memory_context += f"{memory_id}: {memory_content}\n"
+            return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
+        return MEMOS_PRODUCT_ENHANCE_PROMPT
 
     def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
         """
@@ -404,9 +427,13 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
             last_match = complete_matches[-1]
             end_pos = last_match.end()
 
-            # Return text up to the end of the last complete tag
+            # Get text up to the end of the last complete tag
             processed_text = text_buffer[:end_pos]
             remaining_buffer = text_buffer[end_pos:]
+
+            # Apply reference splitting to the processed text
+            processed_text = split_continuous_references(processed_text)
+
             return processed_text, remaining_buffer
 
         # Check for incomplete reference tags
@@ -425,15 +452,22 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
                 return "", text_buffer
             else:
                 # Incomplete opening pattern, return text before it
-                return text_buffer[:opening_start], text_buffer[opening_start:]
+                processed_text = text_buffer[:opening_start]
+                # Apply reference splitting to the processed text
+                processed_text = split_continuous_references(processed_text)
+                return processed_text, text_buffer[opening_start:]
 
         # Check for partial opening pattern (starts with [ but not complete)
         if "[" in text_buffer:
             ref_start = text_buffer.find("[")
-            return text_buffer[:ref_start], text_buffer[ref_start:]
+            processed_text = text_buffer[:ref_start]
+            # Apply reference splitting to the processed text
+            processed_text = split_continuous_references(processed_text)
+            return processed_text, text_buffer[ref_start:]
 
-        # No reference tags found, return all text
-        return text_buffer, ""
+        # No reference tags found, apply reference splitting and return all text
+        processed_text = split_continuous_references(text_buffer)
+        return processed_text, ""
 
     def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
         """
@@ -758,9 +792,8 @@ def chat_with_references(
         if memories_result:
             memories_list = memories_result[0]["memories"]
             memories_list = self._filter_memories_by_threshold(memories_list)
-        # Build custom system prompt with relevant memories
-        system_prompt = self._build_system_prompt(user_id, memories_list)
-
+        # Build custom system prompt with relevant memories)
+        system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
         # Get chat history
         if user_id not in self.chat_history_manager:
             self._register_chat_history(user_id)
@@ -773,6 +806,9 @@ def chat_with_references(
             *chat_history.chat_history,
             {"role": "user", "content": query},
         ]
+        logger.info(
+            f"user_id: {user_id}, cube_id: {cube_id}, current_system_prompt: {system_prompt}"
+        )
         yield f"data: {json.dumps({'type': 'status', 'data': '2'})}\n\n"
         # Generate response with custom prompt
         past_key_values = None
diff --git a/src/memos/mem_os/utils/format_utils.py b/src/memos/mem_os/utils/format_utils.py
@@ -1355,3 +1355,47 @@ def clean_json_response(response: str) -> str:
         str: Clean JSON string without markdown formatting
     """
     return response.replace("```json", "").replace("```", "").strip()
+
+
+def split_continuous_references(text: str) -> str:
+    """
+    Split continuous reference tags into individual reference tags.
+
+    Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
+
+    Only processes text if:
+    1. '[' appears exactly once
+    2. ']' appears exactly once
+    3. Contains commas between '[' and ']'
+
+    Args:
+        text (str): Text containing reference tags
+
+    Returns:
+        str: Text with split reference tags, or original text if conditions not met
+    """
+    # Early return if text is empty
+    if not text:
+        return text
+    # Check if '[' appears exactly once
+    if text.count("[") != 1:
+        return text
+    # Check if ']' appears exactly once
+    if text.count("]") != 1:
+        return text
+    # Find positions of brackets
+    open_bracket_pos = text.find("[")
+    close_bracket_pos = text.find("]")
+
+    # Check if brackets are in correct order
+    if open_bracket_pos >= close_bracket_pos:
+        return text
+    # Extract content between brackets
+    content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
+    # Check if there's a comma between brackets
+    if "," not in content_between_brackets:
+        return text
+    text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
+    text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
+
+    return text
diff --git a/src/memos/templates/mos_prompts.py b/src/memos/templates/mos_prompts.py
@@ -61,3 +61,55 @@
 3. Provides clear reasoning and connections
 4. Is well-structured and easy to understand
 5. Maintains a natural conversational tone"""
+
+MEMOS_PRODUCT_BASE_PROMPT = (
+    "You are a knowledgeable and helpful AI assistant with access to user memories. "
+    "When responding to user queries, you should reference relevant memories using the provided memory IDs. "
+    "Use the reference format: [1-n:memoriesID] "
+    "where refid is a sequential number starting from 1 and increments for each reference in your response, "
+    "and memoriesID is the specific memory ID provided in the available memories list. "
+    "For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
+    "Do not use connect format like [1:abc123,2:def456]"
+    "Only reference memories that are directly relevant to the user's question. "
+    "Make your responses natural and conversational while incorporating memory references when appropriate."
+)
+
+MEMOS_PRODUCT_ENHANCE_PROMPT = """
+# Memory-Enhanced AI Assistant Prompt
+
+You are a knowledgeable and helpful AI assistant with access to two types of memory sources:
+
+## Memory Types
+- **PersonalMemory**: User-specific memories and information stored from previous interactions
+- **OuterMemory**: External information retrieved from the internet and other sources
+
+## Memory Reference Guidelines
+
+### Reference Format
+When citing memories in your responses, use the following format:
+- `[refid:memoriesID]` where:
+  - `refid` is a sequential number starting from 1 and incrementing for each reference
+  - `memoriesID` is the specific memory ID from the available memories list
+
+### Reference Examples
+- Correct: `[1:abc123]`, `[2:def456]`, `[3:ghi789]`, `[4:jkl101]`, `[5:mno112]`
+- Incorrect: `[1:abc123,2:def456]` (do not use connected format)
+
+## Response Guidelines
+
+### Memory Selection
+- Intelligently choose which memories (PersonalMemory or OuterMemory) are most relevant to the user's query
+- Only reference memories that are directly relevant to the user's question
+- Prioritize the most appropriate memory type based on the context and nature of the query
+
+### Response Style
+- Make your responses natural and conversational
+- Seamlessly incorporate memory references when appropriate
+- Ensure the flow of conversation remains smooth despite memory citations
+- Balance factual accuracy with engaging dialogue
+
+## Key Principles
+- Reference only relevant memories to avoid information overload
+- Maintain conversational tone while being informative
+- Use memory references to enhance, not disrupt, the user experience
+"""