Scheduler: fix bugs (#750)

tangg555 · web-flow · commit bb44553adf9f · 2025-12-18T20:28:21.000+08:00
* fix bugs: try to fix bugs in _submit_web_logs

* fix bugs: try to address bugs

* fix bugs

* refactor: modify examples

* revise add operation and fix an unbelievable bug
diff --git a/examples/mem_scheduler/memos_w_scheduler.py b/examples/mem_scheduler/memos_w_scheduler.py
@@ -4,15 +4,13 @@
 
 from datetime import datetime
 from pathlib import Path
-from queue import Queue
 
 from memos.configs.mem_cube import GeneralMemCubeConfig
 from memos.configs.mem_os import MOSConfig
 from memos.configs.mem_scheduler import AuthConfig
 from memos.log import get_logger
 from memos.mem_cube.general import GeneralMemCube
 from memos.mem_os.main import MOS
-from memos.mem_scheduler.general_scheduler import GeneralScheduler
 from memos.mem_scheduler.schemas.message_schemas import ScheduleLogForWebItem
 from memos.mem_scheduler.schemas.task_schemas import (
     ADD_TASK_LABEL,
@@ -160,42 +158,6 @@ def _first_content() -> str:
     return title, _truncate_with_rules(_first_content())
 
 
-def show_web_logs(mem_scheduler: GeneralScheduler):
-    """Display all web log entries from the scheduler's log queue.
-
-    Args:
-        mem_scheduler: The scheduler instance containing web logs to display
-    """
-    if mem_scheduler._web_log_message_queue.empty():
-        print("Web log queue is currently empty.")
-        return
-
-    print("\n" + "=" * 50 + " WEB LOGS " + "=" * 50)
-
-    # Create a temporary queue to preserve the original queue contents
-    temp_queue = Queue()
-    collected: list[ScheduleLogForWebItem] = []
-
-    while not mem_scheduler._web_log_message_queue.empty():
-        log_item: ScheduleLogForWebItem = mem_scheduler._web_log_message_queue.get()
-        collected.append(log_item)
-        temp_queue.put(log_item)
-
-    for idx, log_item in enumerate(sorted(collected, key=lambda x: x.timestamp, reverse=True), 1):
-        title, content = _format_entry(log_item)
-        print(f"\nLog Entry #{idx}:")
-        print(title)
-        print(content)
-        print("-" * 50)
-
-    # Restore items back to the original queue
-    while not temp_queue.empty():
-        mem_scheduler._web_log_message_queue.put(temp_queue.get())
-
-    print(f"\nTotal {len(collected)} web log entries displayed.")
-    print("=" * 110 + "\n")
-
-
 def run_with_scheduler_init():
     print("==== run_with_automatic_scheduler_init ====")
     conversations, questions = init_task()
@@ -253,8 +215,6 @@ def run_with_scheduler_init():
         response = mos.chat(query=query, user_id=user_id)
         print(f"Answer:\n {response}\n")
 
-    show_web_logs(mem_scheduler=mos.mem_scheduler)
-
     mos.mem_scheduler.stop()
 
 
diff --git a/examples/mem_scheduler/try_schedule_modules.py b/examples/mem_scheduler/try_schedule_modules.py
@@ -1,8 +1,6 @@
 import sys
 
 from pathlib import Path
-from queue import Queue
-from typing import TYPE_CHECKING
 
 from tqdm import tqdm
 
@@ -11,18 +9,11 @@
 )
 from memos.log import get_logger
 from memos.mem_scheduler.analyzer.api_analyzer import DirectSearchMemoriesAnalyzer
-from memos.mem_scheduler.base_scheduler import BaseScheduler
 from memos.mem_scheduler.optimized_scheduler import OptimizedScheduler
 from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
 from memos.mem_scheduler.schemas.task_schemas import MEM_UPDATE_TASK_LABEL
 
 
-if TYPE_CHECKING:
-    from memos.mem_scheduler.schemas import (
-        ScheduleLogForWebItem,
-    )
-
-
 FILE_PATH = Path(__file__).absolute()
 BASE_DIR = FILE_PATH.parent.parent.parent
 sys.path.insert(0, str(BASE_DIR))  # Enable execution from any working directory
@@ -105,41 +96,6 @@ def init_task():
     return conversations, questions
 
 
-def show_web_logs(mem_scheduler: BaseScheduler):
-    """Display all web log entries from the scheduler's log queue.
-
-    Args:
-        mem_scheduler: The scheduler instance containing web logs to display
-    """
-    if mem_scheduler._web_log_message_queue.empty():
-        print("Web log queue is currently empty.")
-        return
-
-    print("\n" + "=" * 50 + " WEB LOGS " + "=" * 50)
-
-    # Create a temporary queue to preserve the original queue contents
-    temp_queue = Queue()
-    log_count = 0
-
-    while not mem_scheduler._web_log_message_queue.empty():
-        log_item: ScheduleLogForWebItem = mem_scheduler._web_log_message_queue.get()
-        temp_queue.put(log_item)
-        log_count += 1
-
-        # Print log entry details
-        print(f"\nLog Entry #{log_count}:")
-        print(f'- "{log_item.label}" log: {log_item}')
-
-        print("-" * 50)
-
-    # Restore items back to the original queue
-    while not temp_queue.empty():
-        mem_scheduler._web_log_message_queue.put(temp_queue.get())
-
-    print(f"\nTotal {log_count} web log entries displayed.")
-    print("=" * 110 + "\n")
-
-
 class ScheduleModulesRunner(DirectSearchMemoriesAnalyzer):
     def __init__(self):
         super().__init__()
@@ -215,6 +171,3 @@ def add_msgs(
         mem_scheduler._memory_update_consumer(
             messages=[message],
         )
-
-    # Show accumulated web logs
-    show_web_logs(mem_scheduler)
diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py
@@ -522,7 +522,7 @@ def filter_hallucination_in_memories(
             raw = self.llm.generate([{"role": "user", "content": prompt}])
             success, parsed = self._parse_hallucination_filter_response(raw)
             logger.info(
-                f"[filter_hallucination_in_memories] Hallucination filter parsed successfully: {success}"
+                f"[filter_hallucination_in_memories] Hallucination filter parsed successfully: {success}；prompt: {prompt}"
             )
             if success:
                 logger.info(f"Hallucination filter result: {parsed}")
diff --git a/src/memos/mem_scheduler/webservice_modules/rabbitmq_service.py b/src/memos/mem_scheduler/webservice_modules/rabbitmq_service.py
@@ -108,8 +108,7 @@ def initialize_rabbitmq(
                 elif Path(config_path).exists():
                     auth_config = AuthConfig.from_local_config(config_path=config_path)
                 else:
-                    logger.error("Fail to initialize auth_config")
-                    return
+                    auth_config = AuthConfig.from_local_env()
                 self.rabbitmq_config = auth_config.rabbitmq
             elif isinstance(config, RabbitMQConfig):
                 self.rabbitmq_config = config
diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py
@@ -625,21 +625,20 @@
 SIMPLE_STRUCT_HALLUCINATION_FILTER_PROMPT = """
 You are a strict, language-preserving memory validator and rewriter.
 
-Your task is to compare each memory against the provided user messages (the ground truth) and produce a corrected version only when necessary. Always preserve the original language of the memory—do not translate.
+Your task is to eliminate hallucinations and tighten memories by grounding them strictly in the user’s explicit messages. Memories must be factual, unambiguous, and free of any inferred or speculative content.
 
 Rules:
-1. **Language Consistency**: The rewritten memory must be in the exact same language as the original input memory. Never switch languages.
-2. **Strict Grounding**: Only use information explicitly stated in the user messages. Do not introduce external facts, assumptions, or common sense.
-3. **Ambiguity Resolution**:
-   - Replace vague pronouns (e.g., "he", "it", "they") or unclear references with specific, unambiguous entities based solely on the messages.
-   - Convert relative time expressions (e.g., "yesterday", "last week", "in two days") into absolute dates or times **only if the messages provide enough context** (e.g., current date is known or implied).
-4. **Handling Assistant Inferences**:
-   - If a memory contains any content **not directly stated by the user**—such as interpretations, summaries, emotional attributions, predictions, causal claims, or generalizations—this is considered an assistant inference.
-   - In such cases, you **must** set `need_rewrite = true`.
-   - The `rewritten` text **must explicitly indicate that the statement is an inference**, using a clear and natural prefix in the memory’s language. For English memories, use:
-     > "The assistant inferred that [rest of the memory]."
-   - Do **not** present inferred content as factual user statements.
-5. **No Rewrite Needed**: If the memory is factually accurate, fully grounded in the messages, unambiguous, and contains no unsupported content, set `need_rewrite = false` and copy the original memory exactly.
+1. **Language Consistency**: Keep the exact original language of each memory—no translation or language switching.
+2. **Strict Factual Grounding**: Include only what the user explicitly stated. Remove or flag anything not directly present in the messages—no assumptions, interpretations, predictions, emotional labels, summaries, or generalizations.
+3. **Ambiguity Elimination**:
+   - Replace vague pronouns (e.g., “he”, “it”, “they”) with clear, specific entities **only if** the messages identify them.
+   - Convert relative time expressions (e.g., “yesterday”) to absolute dates **only if** the messages provide enough temporal context.
+4. **Hallucination Removal**:
+   - If a memory contains **any content not verbatim or directly implied by the user**, it must be rewritten.
+   - Do **not** rephrase inferences as facts. Instead, either:
+     - Remove the unsupported part and retain only the grounded core, or
+     - If the entire memory is ungrounded, mark it for rewrite and make the lack of user support explicit.
+5. **No Change if Fully Grounded**: If the memory is concise, unambiguous, and fully supported by the user’s messages, keep it unchanged.
 
 Inputs:
 messages:
@@ -649,15 +648,15 @@
 {memories_inline}
 
 Output Format:
-- Return a JSON object with string keys ("0", "1", "2", ...) corresponding to the input memory indices.
+- Return a JSON object with string keys ("0", "1", "2", ...) matching input memory indices.
 - Each value must be: {{ "need_rewrite": boolean, "rewritten": string, "reason": string }}
-- The "reason" should be concise and specific, e.g.:
-  - "contains assistant inference not stated by user"
-  - "pronoun 'it' has no clear referent in messages"
-  - "relative time 'yesterday' converted to 2025-12-16"
-  - "accurate and directly supported by user message"
+- The "reason" must be brief and precise, e.g.:
+  - "contains unsupported inference"
+  - "vague pronoun with no referent in messages"
+  - "relative time resolved to 2025-12-16"
+  - "fully grounded and concise"
 
-Important: Output **only** the JSON. No additional text, explanations, markdown, or fields.
+Important: Output **only** the JSON. No extra text, explanations, markdown, or fields.
 """
 
 

Original file line number	Diff line number	Diff line change
`@@ -522,7 +522,7 @@ def filter_hallucination_in_memories(`
`522`	`522`	`raw = self.llm.generate([{"role": "user", "content": prompt}])`
`523`	`523`	`success, parsed = self._parse_hallucination_filter_response(raw)`
`524`	`524`	`logger.info(`
`525`		`- f"[filter_hallucination_in_memories] Hallucination filter parsed successfully: {success}"`
	`525`	`+ f"[filter_hallucination_in_memories] Hallucination filter parsed successfully: {success}；prompt: {prompt}"`
`526`	`526`	`)`
`527`	`527`	`if success:`
`528`	`528`	`logger.info(f"Hallucination filter result: {parsed}")`