Skip to content

Commit 90fbbfa

Browse files
authored
feat: add multi language (#92)
1 parent ba74bf1 commit 90fbbfa

File tree

4 files changed

+72
-50
lines changed

4 files changed

+72
-50
lines changed

src/memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def _detect_aggregate_node_for_group(
174174
combined_nodes = [node, *nearest_nodes]
175175

176176
joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
177-
prompt = AGGREGATE_PROMPT.format(joined=joined)
177+
prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
178178
response_text = self._call_llm(prompt)
179179
response_json = self._parse_json_result(response_text)
180180
if not response_json:

src/memos/memories/textual/tree_text_memory/organize/reorganizer.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[Graph
358358
scene_lines.append(line)
359359

360360
joined_scene = "\n".join(scene_lines)
361-
prompt = LOCAL_SUBCLUSTER_PROMPT.format(joined_scene=joined_scene)
361+
prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
362362

363363
messages = [{"role": "user", "content": prompt}]
364364
response_text = self.llm.generate(messages)
@@ -484,18 +484,15 @@ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> Gr
484484
if not cluster_nodes:
485485
raise ValueError("Cluster nodes cannot be empty.")
486486

487-
joined_keys = "\n".join(f"- {n.metadata.key}" for n in cluster_nodes if n.metadata.key)
488-
joined_values = "\n".join(f"- {n.memory}" for n in cluster_nodes)
489-
joined_backgrounds = "\n".join(
490-
f"- {n.metadata.background}" for n in cluster_nodes if n.metadata.background
487+
memories_items_text = "\n\n".join(
488+
[
489+
f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
490+
for i, n in enumerate(cluster_nodes)
491+
]
491492
)
492493

493494
# Build prompt
494-
prompt = REORGANIZE_PROMPT.format(
495-
joined_keys=joined_keys,
496-
joined_values=joined_values,
497-
joined_backgrounds=joined_backgrounds,
498-
)
495+
prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
499496

500497
messages = [{"role": "user", "content": prompt}]
501498
response_text = self.llm.generate(messages)
@@ -505,7 +502,7 @@ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> Gr
505502
parent_key = response_json.get("key", "").strip()
506503
parent_value = response_json.get("value", "").strip()
507504
parent_tags = response_json.get("tags", [])
508-
parent_background = response_json.get("background", "").strip()
505+
parent_background = response_json.get("summary", "").strip()
509506

510507
embedding = self.embedder.embed([parent_value])[0]
511508

src/memos/templates/mem_reader_prompts.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
SIMPLE_STRUCT_MEM_READER_PROMPT = """
2-
You are a memory extraction expert.
1+
SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert.
32
Always respond in the same language as the conversation. If the conversation is in Chinese, respond in Chinese.
43
54
Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}.
@@ -72,8 +71,7 @@
7271
"summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
7372
}
7473
75-
Another Example in Chinese(Note: You MUST output the SAME language as the
76-
input conversation!!):
74+
Another Example in Chinese (注意: 你的输出必须和输入的user语言一致):
7775
{
7876
"memory list": [
7977
{
@@ -93,9 +91,12 @@
9391
Your Output:"""
9492

9593
SIMPLE_STRUCT_DOC_READER_PROMPT = """
94+
**ABSOLUTE, NON-NEGOTIABLE, CRITICAL RULE: The language of your entire JSON output's string values (specifically `summary` and `tags`) MUST be identical to the language of the input `[DOCUMENT_CHUNK]`. There are absolutely no exceptions. Do not translate. If the input is Chinese, the output must be Chinese. If English, the output must be English. Any deviation from this rule constitutes a failure to follow instructions.**
95+
9696
You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object.
97-
Always respond in the same language as the conversation. If the conversation
98-
is in Chinese, MUST respond in Chinese.
97+
Written in English if the input conversation is in English, or in Chinese if
98+
the conversation is in Chinese, or any language which align with the
99+
conversation language. 如果输入语言是中文,请务必输出中文。
99100
100101
The input is a single piece of text: `[DOCUMENT_CHUNK]`.
101102
You must generate a single JSON object with two top-level keys: `summary` and `tags`.
@@ -149,4 +150,18 @@
149150
"summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
150151
}
151152
153+
Another Example in Chinese (注意: 你的输出必须和输入的user语言一致):
154+
{
155+
"memory list": [
156+
{
157+
"key": "项目会议",
158+
"memory_type": "LongTermMemory",
159+
"value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。",
160+
"tags": ["项目", "时间表", "会议", "截止日期"]
161+
},
162+
...
163+
],
164+
"summary": "Tom 目前专注于管理一个进度紧张的新项目..."
165+
}
166+
152167
"""

src/memos/templates/tree_reorganize_prompts.py

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,44 @@
22
33
Given the following child memory items:
44
5-
Keys:
6-
{joined_keys}
5+
{memory_items_text}
6+
7+
Please perform:
8+
1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to.
9+
2. Resolve all time, person, and event references clearly:
10+
- Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible.
11+
- Clearly distinguish between event time and message time.
12+
- If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”).
13+
- Include specific locations if mentioned.
14+
- Resolve all pronouns, aliases, and ambiguous references into full names or identities.
15+
- Disambiguate people with the same name if applicable.
16+
3. Always write from a third-person perspective, referring to user as
17+
"The user" or by name if name mentioned, rather than using first-person ("I", "me", "my").
18+
For example, write "The user felt exhausted..." instead of "I felt exhausted...".
19+
4. Do not omit any information that user is likely to remember.
20+
- Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor.
21+
- Prioritize completeness and fidelity over conciseness.
22+
- Do not generalize or skip details that could be personally meaningful to user.
23+
5. Summarize all child memory items into one memory item.
724
8-
Values:
9-
{joined_values}
10-
11-
Backgrounds:
12-
{joined_backgrounds}
13-
14-
Your task:
15-
- Generate a single clear English `key` (5–10 words max).
16-
- Write a detailed `value` that merges the key points into a single, complete, well-structured text. This must stand alone and convey what the user should remember.
17-
- Provide a list of 5–10 relevant English `tags`.
18-
- Write a short `background` note (50–100 words) covering any extra context, sources, or traceability info.
25+
Language rules:
26+
- The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input memory items. **如果输入是中文,请输出中文**
27+
- Keep `memory_type` in English.
1928
2029
Language rules:
2130
- The `key`, `value`, `tags`, `background` fields must match the language of the input conversation.
2231
2332
Return valid JSON:
24-
{{
25-
"key": "<concise topic>",
26-
"value": "<full memory text>",
27-
"tags": ["tag1", "tag2", ...],
28-
"background": "<extra context>"
29-
}}
33+
{
34+
"key": <string, a unique, concise memory title>,
35+
"memory_type": <string, Either "LongTermMemory" or "UserMemory">,
36+
"value": <A detailed, self-contained, and unambiguous memory statement — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
37+
"tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>,
38+
"summary": <a natural paragraph summarizing the above memories from user's perspective, 120–200 words, same language as the input>
39+
}
3040
"""
3141

32-
LOCAL_SUBCLUSTER_PROMPT = """
33-
You are a memory organization expert.
42+
LOCAL_SUBCLUSTER_PROMPT = """You are a memory organization expert.
3443
3544
You are given a cluster of memory items, each with an ID and content.
3645
Your task is to divide these into smaller, semantically meaningful sub-clusters.
@@ -39,24 +48,25 @@
3948
- Identify natural topics by analyzing common time, place, people, and event elements.
4049
- Each sub-cluster must reflect a coherent theme that helps retrieval.
4150
- Each sub-cluster should have 2–10 items. Discard singletons.
42-
- Each item ID must appear in exactly one sub-cluster.
51+
- Each item ID must appear in exactly one sub-cluster or be discarded. No duplicates are allowed.
52+
- All IDs in the output must be from the provided Memory items.
4353
- Return strictly valid JSON only.
4454
4555
Example: If you have items about a project across multiple phases, group them by milestone, team, or event.
4656
4757
Language rules:
48-
- The `theme` fields must match the language of the input conversation.
58+
- The `key` fields must match the mostly used language of the clustered memories. **如果输入是中文,请输出中文**
4959
5060
Return valid JSON:
51-
{{
61+
{
5262
"clusters": [
53-
{{
54-
"ids": ["id1", "id2", ...],
55-
"theme": "<short label>"
56-
}},
63+
{
64+
"ids": ["<id1>", "<id2>", ...],
65+
"key": "<string, a unique, concise memory title>"
66+
},
5767
...
5868
]
59-
}}
69+
}
6070
6171
Memory items:
6272
{joined_scene}
@@ -90,7 +100,7 @@
90100
- Node 2: "The venue was booked for a wedding in August."
91101
Answer: CONFLICT
92102
93-
Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE_TO | CONFLICT | NONE]
103+
Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE | CONFLICT | NONE]
94104
"""
95105

96106
INFER_FACT_PROMPT = """
@@ -135,12 +145,12 @@
135145
- The `key`, `value`, `tags`, `background` fields must match the language of the input.
136146
137147
Good Aggregate:
138-
{{
148+
{
139149
"key": "Mary's Sustainability Summit Role",
140150
"value": "Mary organized and spoke at the 2023 sustainability summit in Berlin, highlighting renewable energy initiatives.",
141151
"tags": ["Mary", "summit", "Berlin", "2023"],
142152
"background": "Combined from multiple memories about Mary's activities at the summit."
143-
}}
153+
}
144154
145155
If you find NO useful higher-level concept, reply exactly: "None".
146156
"""

0 commit comments

Comments
 (0)