Skip to content

Commit 621ebb0

Browse files
committed
fix: product and format utils and change prompt
1 parent 93cbf2c commit 621ebb0

File tree

3 files changed

+153
-21
lines changed

3 files changed

+153
-21
lines changed

src/memos/mem_os/product.py

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
filter_nodes_by_tree_ids,
2323
remove_embedding_recursive,
2424
sort_children_by_memory_type,
25+
split_continuous_references,
2526
)
2627
from memos.mem_scheduler.schemas.general_schemas import (
2728
ANSWER_LABEL,
@@ -33,6 +34,7 @@
3334
from memos.memories.textual.item import (
3435
TextualMemoryItem,
3536
)
37+
from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
3638
from memos.types import MessageList
3739

3840

@@ -358,28 +360,49 @@ def _build_system_prompt(self, user_id: str, memories_all: list[TextualMemoryIte
358360
"""
359361

360362
# Build base prompt
361-
base_prompt = (
362-
"You are a knowledgeable and helpful AI assistant with access to user memories. "
363-
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
364-
"Use the reference format: [1-n:memoriesID] "
365-
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
366-
"and memoriesID is the specific memory ID provided in the available memories list. "
367-
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
368-
"Only reference memories that are directly relevant to the user's question. "
369-
"Make your responses natural and conversational while incorporating memory references when appropriate."
370-
)
371-
372363
# Add memory context if available
373364
if memories_all:
374365
memory_context = "\n\n## Available ID Memories:\n"
375366
for i, memory in enumerate(memories_all, 1):
376367
# Format: [memory_id]: memory_content
377368
memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
378369
memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
370+
memory_content = memory_content.replace("\n", " ")
379371
memory_context += f"{memory_id}: {memory_content}\n"
380-
return base_prompt + memory_context
372+
return MEMOS_PRODUCT_BASE_PROMPT + memory_context
381373

382-
return base_prompt
374+
return MEMOS_PRODUCT_BASE_PROMPT
375+
376+
def _build_enhance_system_prompt(
377+
self, user_id: str, memories_all: list[TextualMemoryItem]
378+
) -> str:
379+
"""
380+
Build enhance prompt for the user with memory references.
381+
"""
382+
if memories_all:
383+
personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
384+
outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
385+
for i, memory in enumerate(memories_all, 1):
386+
# Format: [memory_id]: memory_content
387+
if memory.metadata.memory_type != "OuterMemory":
388+
memory_id = (
389+
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
390+
)
391+
memory_content = (
392+
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
393+
)
394+
personal_memory_context += f"{memory_id}: {memory_content}\n"
395+
else:
396+
memory_id = (
397+
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
398+
)
399+
memory_content = (
400+
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
401+
)
402+
memory_content = memory_content.replace("\n", " ")
403+
outer_memory_context += f"{memory_id}: {memory_content}\n"
404+
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
405+
return MEMOS_PRODUCT_ENHANCE_PROMPT
383406

384407
def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
385408
"""
@@ -404,9 +427,13 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
404427
last_match = complete_matches[-1]
405428
end_pos = last_match.end()
406429

407-
# Return text up to the end of the last complete tag
430+
# Get text up to the end of the last complete tag
408431
processed_text = text_buffer[:end_pos]
409432
remaining_buffer = text_buffer[end_pos:]
433+
434+
# Apply reference splitting to the processed text
435+
processed_text = split_continuous_references(processed_text)
436+
410437
return processed_text, remaining_buffer
411438

412439
# Check for incomplete reference tags
@@ -425,15 +452,22 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
425452
return "", text_buffer
426453
else:
427454
# Incomplete opening pattern, return text before it
428-
return text_buffer[:opening_start], text_buffer[opening_start:]
455+
processed_text = text_buffer[:opening_start]
456+
# Apply reference splitting to the processed text
457+
processed_text = split_continuous_references(processed_text)
458+
return processed_text, text_buffer[opening_start:]
429459

430460
# Check for partial opening pattern (starts with [ but not complete)
431461
if "[" in text_buffer:
432462
ref_start = text_buffer.find("[")
433-
return text_buffer[:ref_start], text_buffer[ref_start:]
463+
processed_text = text_buffer[:ref_start]
464+
# Apply reference splitting to the processed text
465+
processed_text = split_continuous_references(processed_text)
466+
return processed_text, text_buffer[ref_start:]
434467

435-
# No reference tags found, return all text
436-
return text_buffer, ""
468+
# No reference tags found, apply reference splitting and return all text
469+
processed_text = split_continuous_references(text_buffer)
470+
return processed_text, ""
437471

438472
def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
439473
"""
@@ -758,9 +792,8 @@ def chat_with_references(
758792
if memories_result:
759793
memories_list = memories_result[0]["memories"]
760794
memories_list = self._filter_memories_by_threshold(memories_list)
761-
# Build custom system prompt with relevant memories
762-
system_prompt = self._build_system_prompt(user_id, memories_list)
763-
795+
# Build custom system prompt with relevant memories)
796+
system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
764797
# Get chat history
765798
if user_id not in self.chat_history_manager:
766799
self._register_chat_history(user_id)
@@ -773,6 +806,9 @@ def chat_with_references(
773806
*chat_history.chat_history,
774807
{"role": "user", "content": query},
775808
]
809+
logger.info(
810+
f"user_id: {user_id}, cube_id: {cube_id}, current_system_prompt: {system_prompt}"
811+
)
776812
yield f"data: {json.dumps({'type': 'status', 'data': '2'})}\n\n"
777813
# Generate response with custom prompt
778814
past_key_values = None

src/memos/mem_os/utils/format_utils.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,3 +1355,47 @@ def clean_json_response(response: str) -> str:
13551355
str: Clean JSON string without markdown formatting
13561356
"""
13571357
return response.replace("```json", "").replace("```", "").strip()
1358+
1359+
1360+
def split_continuous_references(text: str) -> str:
1361+
"""
1362+
Split continuous reference tags into individual reference tags.
1363+
1364+
Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1365+
1366+
Only processes text if:
1367+
1. '[' appears exactly once
1368+
2. ']' appears exactly once
1369+
3. Contains commas between '[' and ']'
1370+
1371+
Args:
1372+
text (str): Text containing reference tags
1373+
1374+
Returns:
1375+
str: Text with split reference tags, or original text if conditions not met
1376+
"""
1377+
# Early return if text is empty
1378+
if not text:
1379+
return text
1380+
# Check if '[' appears exactly once
1381+
if text.count("[") != 1:
1382+
return text
1383+
# Check if ']' appears exactly once
1384+
if text.count("]") != 1:
1385+
return text
1386+
# Find positions of brackets
1387+
open_bracket_pos = text.find("[")
1388+
close_bracket_pos = text.find("]")
1389+
1390+
# Check if brackets are in correct order
1391+
if open_bracket_pos >= close_bracket_pos:
1392+
return text
1393+
# Extract content between brackets
1394+
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1395+
# Check if there's a comma between brackets
1396+
if "," not in content_between_brackets:
1397+
return text
1398+
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1399+
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1400+
1401+
return text

src/memos/templates/mos_prompts.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,55 @@
6161
3. Provides clear reasoning and connections
6262
4. Is well-structured and easy to understand
6363
5. Maintains a natural conversational tone"""
64+
65+
MEMOS_PRODUCT_BASE_PROMPT = (
66+
"You are a knowledgeable and helpful AI assistant with access to user memories. "
67+
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
68+
"Use the reference format: [1-n:memoriesID] "
69+
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
70+
"and memoriesID is the specific memory ID provided in the available memories list. "
71+
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
72+
"Do not use connect format like [1:abc123,2:def456]"
73+
"Only reference memories that are directly relevant to the user's question. "
74+
"Make your responses natural and conversational while incorporating memory references when appropriate."
75+
)
76+
77+
MEMOS_PRODUCT_ENHANCE_PROMPT = """
78+
# Memory-Enhanced AI Assistant Prompt
79+
80+
You are a knowledgeable and helpful AI assistant with access to two types of memory sources:
81+
82+
## Memory Types
83+
- **PersonalMemory**: User-specific memories and information stored from previous interactions
84+
- **OuterMemory**: External information retrieved from the internet and other sources
85+
86+
## Memory Reference Guidelines
87+
88+
### Reference Format
89+
When citing memories in your responses, use the following format:
90+
- `[refid:memoriesID]` where:
91+
- `refid` is a sequential number starting from 1 and incrementing for each reference
92+
- `memoriesID` is the specific memory ID from the available memories list
93+
94+
### Reference Examples
95+
- Correct: `[1:abc123]`, `[2:def456]`, `[3:ghi789]`, `[4:jkl101]`, `[5:mno112]`
96+
- Incorrect: `[1:abc123,2:def456]` (do not use connected format)
97+
98+
## Response Guidelines
99+
100+
### Memory Selection
101+
- Intelligently choose which memories (PersonalMemory or OuterMemory) are most relevant to the user's query
102+
- Only reference memories that are directly relevant to the user's question
103+
- Prioritize the most appropriate memory type based on the context and nature of the query
104+
105+
### Response Style
106+
- Make your responses natural and conversational
107+
- Seamlessly incorporate memory references when appropriate
108+
- Ensure the flow of conversation remains smooth despite memory citations
109+
- Balance factual accuracy with engaging dialogue
110+
111+
## Key Principles
112+
- Reference only relevant memories to avoid information overload
113+
- Maintain conversational tone while being informative
114+
- Use memory references to enhance, not disrupt, the user experience
115+
"""

0 commit comments

Comments
 (0)