feat: add multi language (#92)

CaralHsi · web-flow · commit 90fbbfaf6b74 · 2025-07-16T18:34:26.000+08:00
diff --git a/src/memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py b/src/memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py
@@ -174,7 +174,7 @@ def _detect_aggregate_node_for_group(
         combined_nodes = [node, *nearest_nodes]
 
         joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
-        prompt = AGGREGATE_PROMPT.format(joined=joined)
+        prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
         response_text = self._call_llm(prompt)
         response_json = self._parse_json_result(response_text)
         if not response_json:
diff --git a/src/memos/memories/textual/tree_text_memory/organize/reorganizer.py b/src/memos/memories/textual/tree_text_memory/organize/reorganizer.py
@@ -358,7 +358,7 @@ def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[Graph
             scene_lines.append(line)
 
         joined_scene = "\n".join(scene_lines)
-        prompt = LOCAL_SUBCLUSTER_PROMPT.format(joined_scene=joined_scene)
+        prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
 
         messages = [{"role": "user", "content": prompt}]
         response_text = self.llm.generate(messages)
@@ -484,18 +484,15 @@ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> Gr
         if not cluster_nodes:
             raise ValueError("Cluster nodes cannot be empty.")
 
-        joined_keys = "\n".join(f"- {n.metadata.key}" for n in cluster_nodes if n.metadata.key)
-        joined_values = "\n".join(f"- {n.memory}" for n in cluster_nodes)
-        joined_backgrounds = "\n".join(
-            f"- {n.metadata.background}" for n in cluster_nodes if n.metadata.background
+        memories_items_text = "\n\n".join(
+            [
+                f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
+                for i, n in enumerate(cluster_nodes)
+            ]
         )
 
         # Build prompt
-        prompt = REORGANIZE_PROMPT.format(
-            joined_keys=joined_keys,
-            joined_values=joined_values,
-            joined_backgrounds=joined_backgrounds,
-        )
+        prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
 
         messages = [{"role": "user", "content": prompt}]
         response_text = self.llm.generate(messages)
@@ -505,7 +502,7 @@ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> Gr
         parent_key = response_json.get("key", "").strip()
         parent_value = response_json.get("value", "").strip()
         parent_tags = response_json.get("tags", [])
-        parent_background = response_json.get("background", "").strip()
+        parent_background = response_json.get("summary", "").strip()
 
         embedding = self.embedder.embed([parent_value])[0]
 
diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py
@@ -1,5 +1,4 @@
-SIMPLE_STRUCT_MEM_READER_PROMPT = """
-You are a memory extraction expert.
+SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert.
 Always respond in the same language as the conversation. If the conversation is in Chinese, respond in Chinese.
 
 Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}.
@@ -72,8 +71,7 @@
   "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
 }
 
-Another Example in Chinese(Note: You MUST output the SAME language as the
-input conversation!!)：
+Another Example in Chinese (注意: 你的输出必须和输入的user语言一致)：
 {
   "memory list": [
     {
@@ -93,9 +91,12 @@
 Your Output:"""
 
 SIMPLE_STRUCT_DOC_READER_PROMPT = """
+**ABSOLUTE, NON-NEGOTIABLE, CRITICAL RULE: The language of your entire JSON output's string values (specifically `summary` and `tags`) MUST be identical to the language of the input `[DOCUMENT_CHUNK]`. There are absolutely no exceptions. Do not translate. If the input is Chinese, the output must be Chinese. If English, the output must be English. Any deviation from this rule constitutes a failure to follow instructions.**
+
 You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object.
-Always respond in the same language as the conversation. If the conversation
-is in Chinese, MUST respond in Chinese.
+Written in English if the input conversation is in English, or in Chinese if
+the conversation is in Chinese, or any language which align with the
+conversation language. 如果输入语言是中文，请务必输出中文。
 
 The input is a single piece of text: `[DOCUMENT_CHUNK]`.
 You must generate a single JSON object with two top-level keys: `summary` and `tags`.
@@ -149,4 +150,18 @@
   "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
 }
 
+Another Example in Chinese (注意: 你的输出必须和输入的user语言一致)：
+{
+  "memory list": [
+    {
+      "key": "项目会议",
+      "memory_type": "LongTermMemory",
+      "value": "在2025年6月25日下午3点，Tom与团队开会讨论了新项目，涉及时间表，并提出了对12月15日截止日期可行性的担忧。",
+      "tags": ["项目", "时间表", "会议", "截止日期"]
+    },
+    ...
+  ],
+  "summary": "Tom 目前专注于管理一个进度紧张的新项目..."
+}
+
 """
diff --git a/src/memos/templates/tree_reorganize_prompts.py b/src/memos/templates/tree_reorganize_prompts.py
@@ -2,35 +2,44 @@
 
 Given the following child memory items:
 
-Keys:
-{joined_keys}
+{memory_items_text}
+
+Please perform:
+1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to.
+2. Resolve all time, person, and event references clearly:
+   - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible.
+   - Clearly distinguish between event time and message time.
+   - If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”).
+   - Include specific locations if mentioned.
+   - Resolve all pronouns, aliases, and ambiguous references into full names or identities.
+   - Disambiguate people with the same name if applicable.
+3. Always write from a third-person perspective, referring to user as
+"The user" or by name if name mentioned, rather than using first-person ("I", "me", "my").
+For example, write "The user felt exhausted..." instead of "I felt exhausted...".
+4. Do not omit any information that user is likely to remember.
+   - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor.
+   - Prioritize completeness and fidelity over conciseness.
+   - Do not generalize or skip details that could be personally meaningful to user.
+5. Summarize all child memory items into one memory item.
 
-Values:
-{joined_values}
-
-Backgrounds:
-{joined_backgrounds}
-
-Your task:
-- Generate a single clear English `key` (5–10 words max).
-- Write a detailed `value` that merges the key points into a single, complete, well-structured text. This must stand alone and convey what the user should remember.
-- Provide a list of 5–10 relevant English `tags`.
-- Write a short `background` note (50–100 words) covering any extra context, sources, or traceability info.
+Language rules:
+- The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input memory items. **如果输入是中文，请输出中文**
+- Keep `memory_type` in English.
 
 Language rules:
 - The `key`, `value`, `tags`, `background` fields must match the language of the input conversation.
 
 Return valid JSON:
-{{
-  "key": "<concise topic>",
-  "value": "<full memory text>",
-  "tags": ["tag1", "tag2", ...],
-  "background": "<extra context>"
-}}
+{
+  "key": <string, a unique, concise memory title>,
+  "memory_type": <string, Either "LongTermMemory" or "UserMemory">,
+  "value": <A detailed, self-contained, and unambiguous memory statement — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
+  "tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>,
+  "summary": <a natural paragraph summarizing the above memories from user's perspective, 120–200 words, same language as the input>
+}
 """
 
-LOCAL_SUBCLUSTER_PROMPT = """
-You are a memory organization expert.
+LOCAL_SUBCLUSTER_PROMPT = """You are a memory organization expert.
 
 You are given a cluster of memory items, each with an ID and content.
 Your task is to divide these into smaller, semantically meaningful sub-clusters.
@@ -39,24 +48,25 @@
 - Identify natural topics by analyzing common time, place, people, and event elements.
 - Each sub-cluster must reflect a coherent theme that helps retrieval.
 - Each sub-cluster should have 2–10 items. Discard singletons.
-- Each item ID must appear in exactly one sub-cluster.
+- Each item ID must appear in exactly one sub-cluster or be discarded. No duplicates are allowed.
+- All IDs in the output must be from the provided Memory items.
 - Return strictly valid JSON only.
 
 Example: If you have items about a project across multiple phases, group them by milestone, team, or event.
 
 Language rules:
-- The `theme` fields must match the language of the input conversation.
+- The `key` fields must match the mostly used language of the clustered memories. **如果输入是中文，请输出中文**
 
 Return valid JSON:
-{{
+{
   "clusters": [
-    {{
-      "ids": ["id1", "id2", ...],
-      "theme": "<short label>"
-    }},
+    {
+      "ids": ["<id1>", "<id2>", ...],
+      "key": "<string, a unique, concise memory title>"
+    },
     ...
   ]
-}}
+}
 
 Memory items:
 {joined_scene}
@@ -90,7 +100,7 @@
 - Node 2: "The venue was booked for a wedding in August."
 Answer: CONFLICT
 
-Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE_TO | CONFLICT | NONE]
+Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE | CONFLICT | NONE]
 """
 
 INFER_FACT_PROMPT = """
@@ -135,12 +145,12 @@
 - The `key`, `value`, `tags`, `background` fields must match the language of the input.
 
 Good Aggregate:
-{{
+{
   "key": "Mary's Sustainability Summit Role",
   "value": "Mary organized and spoke at the 2023 sustainability summit in Berlin, highlighting renewable energy initiatives.",
   "tags": ["Mary", "summit", "Berlin", "2023"],
   "background": "Combined from multiple memories about Mary's activities at the summit."
-}}
+}
 
 If you find NO useful higher-level concept, reply exactly: "None".
 """