Feat/fix palyground bug (#718)

Wang-Daoji · yuan.wang · fridayL · web-flow · commit a82149fae9c5 · 2025-12-17T15:32:30.000+08:00
* fix playground bug, internet search judge

* fix playground internet bug

* modify delete mem

* modify tool resp bug in multi cube

* fix bug in playground chat handle and search inter

* modify prompt

* fix bug in playground

* fix bug playfround

* fix bug

* fix code

* fix model bug in playground

* modify plan b

* llm param modify

* add logger in playground

* modify code

* fix bug

* modify code

* modify code

* fix bug

* fix search bug in plarground

* fixx bug

* move schadualr to back

* modify pref location

* modify fast net search

* add tags and new package

* modify prompt fix bug

* remove nltk due to image promblem

* prompt modify

* modify bug remove redundant field

* modify bug

* fix playground bug

* fix bug

* bust internet topk

* bust to 50

* fix bug cite

* modify search

* remote query add in playground

* modify bug

* modify pref bug

* move add position

* modify chat prompt

* modify overthinking

* add logger in playground chat

---------

Co-authored-by: yuan.wang &lt;yuan.wang@yuanwangdebijibendiannao.local&gt;
Co-authored-by: chunyu li &lt;78344051+fridayL@users.noreply.github.com&gt;
Co-authored-by: CaralHsi &lt;caralhsi@gmail.com&gt;
diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py
@@ -37,6 +37,7 @@
     ANSWER_TASK_LABEL,
     QUERY_TASK_LABEL,
 )
+from memos.templates.cloud_service_prompt import get_cloud_chat_prompt
 from memos.templates.mos_prompts import (
     FURTHER_SUGGESTION_PROMPT,
     get_memos_prompt,
@@ -145,9 +146,10 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
 
             # Step 2: Build system prompt
             system_prompt = self._build_system_prompt(
-                filtered_memories,
-                search_response.data.get("pref_string", ""),
-                chat_req.system_prompt,
+                query=chat_req.query,
+                memories=filtered_memories,
+                pref_string=search_response.data.get("pref_string", ""),
+                base_prompt=chat_req.system_prompt,
             )
 
             # Prepare message history
@@ -263,9 +265,10 @@ def generate_chat_response() -> Generator[str, None, None]:
 
                     # Step 2: Build system prompt with memories
                     system_prompt = self._build_system_prompt(
-                        filtered_memories,
-                        search_response.data.get("pref_string", ""),
-                        chat_req.system_prompt,
+                        query=chat_req.query,
+                        memories=filtered_memories,
+                        pref_string=search_response.data.get("pref_string", ""),
+                        base_prompt=chat_req.system_prompt,
                     )
 
                     # Prepare messages
@@ -462,6 +465,7 @@ def generate_chat_response() -> Generator[str, None, None]:
                         conversation=chat_req.history,
                         mode="fine",
                     )
+                    self.logger.info(f"[PLAYGROUND chat parsed_goal]: {parsed_goal}")
 
                     if chat_req.beginner_guide_step == "first":
                         chat_req.internet_search = False
@@ -476,8 +480,8 @@ def generate_chat_response() -> Generator[str, None, None]:
 
                     # ======  second deep search  ======
                     search_req = APISearchRequest(
-                        query=parsed_goal.rephrased_query
-                        or chat_req.query + (f"{parsed_goal.tags}" if parsed_goal.tags else ""),
+                        query=(parsed_goal.rephrased_query or chat_req.query)
+                        + (f"{parsed_goal.tags}" if parsed_goal.tags else ""),
                         user_id=chat_req.user_id,
                         readable_cube_ids=readable_cube_ids,
                         mode="fast",
@@ -491,6 +495,9 @@ def generate_chat_response() -> Generator[str, None, None]:
                         search_memory_type="All",
                         search_tool_memory=False,
                     )
+
+                    self.logger.info(f"[PLAYGROUND second search query]: {search_req.query}")
+
                     start_time = time.time()
                     search_response = self.search_handler.handle_search_memories(search_req)
                     end_time = time.time()
@@ -762,19 +769,16 @@ def _build_pref_md_string_for_playground(self, pref_mem_list: list[any]) -> str:
 
     def _build_system_prompt(
         self,
+        query: str,
         memories: list | None = None,
         pref_string: str | None = None,
         base_prompt: str | None = None,
         **kwargs,
     ) -> str:
         """Build system prompt with optional memories context."""
         if base_prompt is None:
-            base_prompt = (
-                "You are a knowledgeable and helpful AI assistant. "
-                "You have access to conversation memories that help you provide more personalized responses. "
-                "Use the memories to understand the user's context, preferences, and past interactions. "
-                "If memories are provided, reference them naturally when relevant, but don't explicitly mention having memories."
-            )
+            lang = detect_lang(query)
+            base_prompt = get_cloud_chat_prompt(lang=lang)
 
         memory_context = ""
         if memories:
@@ -790,7 +794,7 @@ def _build_system_prompt(
             return base_prompt.format(memories=memory_context)
         elif base_prompt and memories:
             # For backward compatibility, append memories if no placeholder is found
-            memory_context_with_header = "\n\n## Memories:\n" + memory_context
+            memory_context_with_header = "\n\n## Fact Memories:\n" + memory_context
             return base_prompt + memory_context_with_header
         return base_prompt
 
diff --git a/src/memos/templates/cloud_service_prompt.py b/src/memos/templates/cloud_service_prompt.py
@@ -0,0 +1,107 @@
+from datetime import datetime
+
+
+CLOUD_CHAT_PROMPT_ZH = """
+# Role
+你是一个拥有长期记忆能力的智能助手 (MemOS Assistant)。你的目标是结合检索到的记忆片段，为用户提供高度个性化、准确且逻辑严密的回答。
+
+# System Context
+- 当前时间: {current_time} (请以此作为判断记忆时效性的基准)
+
+# Memory Data
+以下是 MemOS 检索到的相关信息，分为“事实”和“偏好”。
+- **事实 (Facts)**：可能包含用户属性、历史对话记录或第三方信息。
+  - **特别注意**：其中标记为 `[assistant观点]`、`[模型总结]` 的内容代表 **AI 过去的推断**，**并非**用户的原话。
+- **偏好 (Preferences)**：用户对回答风格、格式或逻辑的显式/隐式要求。
+
+<memories>
+{memories}
+</memories>
+
+# Critical Protocol: Memory Safety (记忆安全协议)
+检索到的记忆可能包含**AI 自身的推测**、**无关噪音**或**主体错误**。你必须严格执行以下**“四步判决”**，只要有一步不通过，就**丢弃**该条记忆：
+
+1. **来源真值检查 (Source Verification)**：
+   - **核心**：区分“用户原话”与“AI 推测”。
+   - 如果记忆带有 `[assistant观点]` 等标签，这仅代表AI过去的**假设**，**不可**将其视为用户的绝对事实。
+   - *反例*：记忆显示 `[assistant观点] 用户酷爱芒果`。如果用户没提，不要主动假设用户喜欢芒果，防止循环幻觉。
+   - **原则：AI 的总结仅供参考，权重大幅低于用户的直接陈述。**
+
+2. **主语归因检查 (Attribution Check)**：
+   - 记忆中的行为主体是“用户本人”吗？
+   - 如果记忆描述的是**第三方**（如“候选人”、“面试者”、“虚构角色”、“案例数据”），**严禁**将其属性归因于用户。
+
+3. **强相关性检查 (Relevance Check)**：
+   - 记忆是否直接有助于回答当前的 `Original Query`？
+   - 如果记忆仅仅是关键词匹配（如：都提到了“代码”）但语境完全不同，**必须忽略**。
+
+4. **时效性检查 (Freshness Check)**：
+   - 记忆内容是否与用户的最新意图冲突？以当前的 `Original Query` 为最高事实标准。
+
+# Instructions
+1. **审视**：先阅读 `facts memories`，执行“四步判决”，剔除噪音和不可靠的 AI 观点。
+2. **执行**：
+   - 仅使用通过筛选的记忆补充背景。
+   - 严格遵守 `preferences` 中的风格要求。
+3. **输出**：直接回答问题，**严禁**提及“记忆库”、“检索”或“AI 观点”等系统内部术语。
+4. **语言**：回答语言应与用户查询语言一致。
+"""
+
+
+CLOUD_CHAT_PROMPT_EN = """
+# Role
+You are an intelligent assistant powered by MemOS. Your goal is to provide personalized and accurate responses by leveraging retrieved memory fragments, while strictly avoiding hallucinations caused by past AI inferences.
+
+# System Context
+- Current Time: {current_time} (Baseline for freshness)
+
+# Memory Data
+Below is the information retrieved by MemOS, categorized into "Facts" and "Preferences".
+- **Facts**: May contain user attributes, historical logs, or third-party details.
+  - **Warning**: Content tagged with `[assistant观点]` or `[summary]` represents **past AI inferences**, NOT direct user quotes.
+- **Preferences**: Explicit or implicit user requirements regarding response style and format.
+
+<memories>
+{memories}
+</memories>
+
+# Critical Protocol: Memory Safety
+You must strictly execute the following **"Four-Step Verdict"**. If a memory fails any step, **DISCARD IT**:
+
+1. **Source Verification (CRITICAL)**:
+   - **Core**: Distinguish between "User's Input" and "AI's Inference".
+   - If a memory is tagged as `[assistant观点]`, treat it as a **hypothesis**, not a hard fact.
+   - *Example*: Memory says `[assistant view] User loves mango`. Do not treat this as absolute truth unless reaffirmed.
+   - **Principle: AI summaries have much lower authority than direct user statements.**
+
+2. **Attribution Check**:
+   - Is the "Subject" of the memory definitely the User?
+   - If the memory describes a **Third Party** (e.g., Candidate, Fictional Character), **NEVER** attribute these traits to the User.
+
+3. **Relevance Check**:
+   - Does the memory *directly* help answer the current `Original Query`?
+   - If it is merely a keyword match with different context, **IGNORE IT**.
+
+4. **Freshness Check**:
+   - Does the memory conflict with the user's current intent? The current `Original Query` is always the supreme Source of Truth.
+
+# Instructions
+1. **Filter**: Apply the "Four-Step Verdict" to all `fact memories` to filter out noise and unreliable AI views.
+2. **Synthesize**: Use only validated memories for context.
+3. **Style**: Strictly adhere to `preferences`.
+4. **Output**: Answer directly. **NEVER** mention "retrieved memories," "database," or "AI views" in your response.
+5. **language**: The response language should be the same as the user's query language.
+"""
+
+
+def get_cloud_chat_prompt(lang: str = "en") -> str:
+    if lang == "zh":
+        return CLOUD_CHAT_PROMPT_ZH.replace(
+            "{current_time}", datetime.now().strftime("%Y-%m-%d %H:%M (%A)")
+        )
+    elif lang == "en":
+        return CLOUD_CHAT_PROMPT_EN.replace(
+            "{current_time}", datetime.now().strftime("%Y-%m-%d %H:%M (%A)")
+        )
+    else:
+        raise ValueError(f"Invalid language: {lang}")
diff --git a/src/memos/templates/mos_prompts.py b/src/memos/templates/mos_prompts.py
@@ -158,6 +158,7 @@
 - For preferences, do not mention the source in the response, do not appear `[Explicit preference]`, `[Implicit preference]`, `(Explicit preference)` or `(Implicit preference)` in the response
 - The last part of the response should not contain `(Note: ...)` or `(According to ...)` etc.
 - In the thinking mode (think), also strictly use the citation format `[i:memId]`,`i` is the order in the "Memories" section below (starting at 1). `memId` is the given short memory ID. The same as the response format.
+- Do not repeat the thinking too much, use the correct reasoning
 
 ## Key Principles
 - Reference only relevant memories to avoid information overload
@@ -267,6 +268,7 @@
 - 对于偏好，不要在回答中标注来源，不要出现`[显式偏好]`或`[隐式偏好]`或`(显式偏好)`或`(隐式偏好)`的字样
 - 回复内容的结尾不要出现`(注: ...)`或`(根据...)`等解释
 - 在思考模式下(think),也需要严格采用引用格式`[i:memId]`,`i`是下面"记忆"部分中的顺序（从1开始）。`memId`是给定的短记忆ID。与回答要求一致
+- 不要过度重复的思考，使用正确的推理
 
 ## 核心原则
 - 仅引用相关记忆以避免信息过载