Skip to content

Commit 64cc9af

Browse files
authored
feat: update dim env (#186)
* feat: add user manager factory pattern and product API enhancements - Add user manager factory pattern with SQLite and MySQL backends - Add user manager configuration to MOSConfig - Add product API router and configuration - Add DingDing notification integration - Add notification service utilities - Update OpenAPI documentation * fix: change user manager default * fix: update config * fix: remove db_name for neublar * fix:host * feat:update db * fix:test * fix: reomve dup file * feat: add logs * feat: add users profile field * fix: product and format utils and change prompt * fix: bugs for chat * feat: add dim for config
1 parent 80df31c commit 64cc9af

File tree

6 files changed

+161
-28
lines changed

6 files changed

+161
-28
lines changed

src/memos/api/config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,14 @@ def get_neo4j_community_config(user_id: str | None = None) -> dict[str, Any]:
169169
"user_name": f"memos{user_id.replace('-', '')}",
170170
"auto_create": True,
171171
"use_multi_db": False,
172-
"embedding_dimension": 3072,
172+
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)),
173173
"vec_config": {
174174
# Pass nested config to initialize external vector DB
175175
# If you use qdrant, please use Server instead of local mode.
176176
"backend": "qdrant",
177177
"config": {
178178
"collection_name": "neo4j_vec_db",
179-
"vector_dimension": 3072,
179+
"vector_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)),
180180
"distance_metric": "cosine",
181181
"host": "localhost",
182182
"port": 6333,
@@ -202,7 +202,7 @@ def get_noshared_neo4j_config(user_id) -> dict[str, Any]:
202202
"password": os.getenv("NEO4J_PASSWORD", "12345678"),
203203
"auto_create": True,
204204
"use_multi_db": True,
205-
"embedding_dimension": 3072,
205+
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)),
206206
}
207207

208208
@staticmethod
@@ -216,7 +216,7 @@ def get_neo4j_shared_config(user_id: str | None = None) -> dict[str, Any]:
216216
"user_name": f"memos{user_id.replace('-', '')}",
217217
"auto_create": True,
218218
"use_multi_db": False,
219-
"embedding_dimension": 3072,
219+
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)),
220220
}
221221

222222
@staticmethod
@@ -230,7 +230,7 @@ def get_nebular_config(user_id: str | None = None) -> dict[str, Any]:
230230
"user_name": f"memos{user_id.replace('-', '')}",
231231
"use_multi_db": False,
232232
"auto_create": True,
233-
"embedding_dimension": 3072,
233+
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)),
234234
}
235235

236236
@staticmethod

src/memos/mem_os/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
import os
33
import time
4-
import uuid
54

65
from datetime import datetime
76
from pathlib import Path
@@ -565,7 +564,8 @@ def search(
565564
logger.info(
566565
f"User {target_user_id} has access to {len(user_cube_ids)} cubes: {user_cube_ids}"
567566
)
568-
567+
if target_user_id not in self.chat_history_manager:
568+
self._register_chat_history(target_user_id)
569569
chat_history = self.chat_history_manager[target_user_id]
570570

571571
result: MOSSearchResult = {

src/memos/mem_os/product.py

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
filter_nodes_by_tree_ids,
2323
remove_embedding_recursive,
2424
sort_children_by_memory_type,
25+
split_continuous_references,
2526
)
2627
from memos.mem_scheduler.schemas.general_schemas import (
2728
ANSWER_LABEL,
@@ -33,6 +34,7 @@
3334
from memos.memories.textual.item import (
3435
TextualMemoryItem,
3536
)
37+
from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
3638
from memos.types import MessageList
3739

3840

@@ -360,28 +362,49 @@ def _build_system_prompt(
360362
"""
361363

362364
# Build base prompt
363-
base_prompt = (
364-
"You are a knowledgeable and helpful AI assistant with access to user memories. "
365-
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
366-
"Use the reference format: [1-n:memoriesID] "
367-
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
368-
"and memoriesID is the specific memory ID provided in the available memories list. "
369-
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
370-
"Only reference memories that are directly relevant to the user's question. "
371-
"Make your responses natural and conversational while incorporating memory references when appropriate."
372-
)
373-
374365
# Add memory context if available
375366
if memories_all:
376367
memory_context = "\n\n## Available ID Memories:\n"
377368
for i, memory in enumerate(memories_all, 1):
378369
# Format: [memory_id]: memory_content
379370
memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
380371
memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
372+
memory_content = memory_content.replace("\n", " ")
381373
memory_context += f"{memory_id}: {memory_content}\n"
382-
return base_prompt + memory_context
374+
return MEMOS_PRODUCT_BASE_PROMPT + memory_context
375+
376+
return MEMOS_PRODUCT_BASE_PROMPT
383377

384-
return base_prompt
378+
def _build_enhance_system_prompt(
379+
self, user_id: str, memories_all: list[TextualMemoryItem]
380+
) -> str:
381+
"""
382+
Build enhance prompt for the user with memory references.
383+
"""
384+
if memories_all:
385+
personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
386+
outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
387+
for i, memory in enumerate(memories_all, 1):
388+
# Format: [memory_id]: memory_content
389+
if memory.metadata.memory_type != "OuterMemory":
390+
memory_id = (
391+
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
392+
)
393+
memory_content = (
394+
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
395+
)
396+
personal_memory_context += f"{memory_id}: {memory_content}\n"
397+
else:
398+
memory_id = (
399+
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
400+
)
401+
memory_content = (
402+
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
403+
)
404+
memory_content = memory_content.replace("\n", " ")
405+
outer_memory_context += f"{memory_id}: {memory_content}\n"
406+
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
407+
return MEMOS_PRODUCT_ENHANCE_PROMPT
385408

386409
def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
387410
"""
@@ -406,9 +429,13 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
406429
last_match = complete_matches[-1]
407430
end_pos = last_match.end()
408431

409-
# Return text up to the end of the last complete tag
432+
# Get text up to the end of the last complete tag
410433
processed_text = text_buffer[:end_pos]
411434
remaining_buffer = text_buffer[end_pos:]
435+
436+
# Apply reference splitting to the processed text
437+
processed_text = split_continuous_references(processed_text)
438+
412439
return processed_text, remaining_buffer
413440

414441
# Check for incomplete reference tags
@@ -427,15 +454,22 @@ def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str,
427454
return "", text_buffer
428455
else:
429456
# Incomplete opening pattern, return text before it
430-
return text_buffer[:opening_start], text_buffer[opening_start:]
457+
processed_text = text_buffer[:opening_start]
458+
# Apply reference splitting to the processed text
459+
processed_text = split_continuous_references(processed_text)
460+
return processed_text, text_buffer[opening_start:]
431461

432462
# Check for partial opening pattern (starts with [ but not complete)
433463
if "[" in text_buffer:
434464
ref_start = text_buffer.find("[")
435-
return text_buffer[:ref_start], text_buffer[ref_start:]
465+
processed_text = text_buffer[:ref_start]
466+
# Apply reference splitting to the processed text
467+
processed_text = split_continuous_references(processed_text)
468+
return processed_text, text_buffer[ref_start:]
436469

437-
# No reference tags found, return all text
438-
return text_buffer, ""
470+
# No reference tags found, apply reference splitting and return all text
471+
processed_text = split_continuous_references(text_buffer)
472+
return processed_text, ""
439473

440474
def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
441475
"""
@@ -760,9 +794,8 @@ def chat_with_references(
760794
if memories_result:
761795
memories_list = memories_result[0]["memories"]
762796
memories_list = self._filter_memories_by_threshold(memories_list)
763-
# Build custom system prompt with relevant memories
764-
system_prompt = self._build_system_prompt(memories_list, base_prompt=None)
765-
797+
# Build custom system prompt with relevant memories)
798+
system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
766799
# Get chat history
767800
if user_id not in self.chat_history_manager:
768801
self._register_chat_history(user_id)
@@ -775,6 +808,9 @@ def chat_with_references(
775808
*chat_history.chat_history,
776809
{"role": "user", "content": query},
777810
]
811+
logger.info(
812+
f"user_id: {user_id}, cube_id: {cube_id}, current_system_prompt: {system_prompt}"
813+
)
778814
yield f"data: {json.dumps({'type': 'status', 'data': '2'})}\n\n"
779815
# Generate response with custom prompt
780816
past_key_values = None

src/memos/mem_os/utils/format_utils.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,3 +1355,47 @@ def clean_json_response(response: str) -> str:
13551355
str: Clean JSON string without markdown formatting
13561356
"""
13571357
return response.replace("```json", "").replace("```", "").strip()
1358+
1359+
1360+
def split_continuous_references(text: str) -> str:
1361+
"""
1362+
Split continuous reference tags into individual reference tags.
1363+
1364+
Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1365+
1366+
Only processes text if:
1367+
1. '[' appears exactly once
1368+
2. ']' appears exactly once
1369+
3. Contains commas between '[' and ']'
1370+
1371+
Args:
1372+
text (str): Text containing reference tags
1373+
1374+
Returns:
1375+
str: Text with split reference tags, or original text if conditions not met
1376+
"""
1377+
# Early return if text is empty
1378+
if not text:
1379+
return text
1380+
# Check if '[' appears exactly once
1381+
if text.count("[") != 1:
1382+
return text
1383+
# Check if ']' appears exactly once
1384+
if text.count("]") != 1:
1385+
return text
1386+
# Find positions of brackets
1387+
open_bracket_pos = text.find("[")
1388+
close_bracket_pos = text.find("]")
1389+
1390+
# Check if brackets are in correct order
1391+
if open_bracket_pos >= close_bracket_pos:
1392+
return text
1393+
# Extract content between brackets
1394+
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1395+
# Check if there's a comma between brackets
1396+
if "," not in content_between_brackets:
1397+
return text
1398+
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1399+
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1400+
1401+
return text

src/memos/memories/textual/tree_text_memory/retrieve/searcher.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ def retrieve_from_internet():
215215

216216
# Step 5: Update usage history with current timestamp
217217
now_time = datetime.now().isoformat()
218+
if "chat_history" in info:
219+
info.pop("chat_history")
218220
usage_record = json.dumps(
219221
{"time": now_time, "info": info}
220222
) # `info` should be a serializable dict or string

src/memos/templates/mos_prompts.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,57 @@
6262
4. Is well-structured and easy to understand
6363
5. Maintains a natural conversational tone"""
6464

65+
MEMOS_PRODUCT_BASE_PROMPT = (
66+
"You are a knowledgeable and helpful AI assistant with access to user memories. "
67+
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
68+
"Use the reference format: [1-n:memoriesID] "
69+
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
70+
"and memoriesID is the specific memory ID provided in the available memories list. "
71+
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
72+
"Do not use connect format like [1:abc123,2:def456]"
73+
"Only reference memories that are directly relevant to the user's question. "
74+
"Make your responses natural and conversational while incorporating memory references when appropriate."
75+
)
76+
77+
MEMOS_PRODUCT_ENHANCE_PROMPT = """
78+
# Memory-Enhanced AI Assistant Prompt
79+
80+
You are a knowledgeable and helpful AI assistant with access to two types of memory sources:
81+
82+
## Memory Types
83+
- **PersonalMemory**: User-specific memories and information stored from previous interactions
84+
- **OuterMemory**: External information retrieved from the internet and other sources
85+
86+
## Memory Reference Guidelines
87+
88+
### Reference Format
89+
When citing memories in your responses, use the following format:
90+
- `[refid:memoriesID]` where:
91+
- `refid` is a sequential number starting from 1 and incrementing for each reference
92+
- `memoriesID` is the specific memory ID from the available memories list
93+
94+
### Reference Examples
95+
- Correct: `[1:abc123]`, `[2:def456]`, `[3:ghi789]`, `[4:jkl101]`, `[5:mno112]`
96+
- Incorrect: `[1:abc123,2:def456]` (do not use connected format)
97+
98+
## Response Guidelines
99+
100+
### Memory Selection
101+
- Intelligently choose which memories (PersonalMemory or OuterMemory) are most relevant to the user's query
102+
- Only reference memories that are directly relevant to the user's question
103+
- Prioritize the most appropriate memory type based on the context and nature of the query
104+
105+
### Response Style
106+
- Make your responses natural and conversational
107+
- Seamlessly incorporate memory references when appropriate
108+
- Ensure the flow of conversation remains smooth despite memory citations
109+
- Balance factual accuracy with engaging dialogue
110+
111+
## Key Principles
112+
- Reference only relevant memories to avoid information overload
113+
- Maintain conversational tone while being informative
114+
- Use memory references to enhance, not disrupt, the user experience
115+
"""
65116
QUERY_REWRITING_PROMPT = """
66117
I'm in discussion with my friend about a question, and we have already talked about something before that. Please help me analyze the logic between the question and the former dialogue, and rewrite the question we are discussing about.
67118

0 commit comments

Comments
 (0)