add new feat of thread race, and add a new test case for scheduler dispatcher

tangg555 · tangg555 · commit ccef65166dd4 · 2025-09-28T16:08:27.000+08:00
diff --git a/evaluation/scripts/temporal_locomo/modules/locomo_eval_module.py b/evaluation/scripts/temporal_locomo/modules/locomo_eval_module.py
@@ -531,6 +531,25 @@ def process_qa(qa):
             json.dump(dict(search_results), fw, indent=2)
             print(f"Save search results {conv_id}")
 
+        search_durations = []
+        for result in response_results[conv_id]:
+            if "search_duration_ms" in result:
+                search_durations.append(result["search_duration_ms"])
+
+        if search_durations:
+            avg_search_duration = sum(search_durations) / len(search_durations)
+            with self.stats_lock:
+                if self.stats[self.frame][self.version]["memory_stats"]["avg_search_duration_ms"]:
+                    self.stats[self.frame][self.version]["memory_stats"][
+                        "avg_search_duration_ms"
+                    ] = (
+                        self.stats[self.frame][self.version]["memory_stats"][
+                            "avg_search_duration_ms"
+                        ]
+                        + avg_search_duration
+                    ) / 2
+                print(f"Average search duration: {avg_search_duration:.2f} ms")
+
         # Dump stats after processing each user
         self.save_stats()
 
diff --git a/evaluation/scripts/temporal_locomo/modules/thread_race.py b/evaluation/scripts/temporal_locomo/modules/thread_race.py
@@ -0,0 +1,134 @@
+import random
+import threading
+import time
+
+
+class ThreadRace:
+    def __init__(self):
+        # Variable to store the result
+        self.result = None
+        # Event to mark if the race is finished
+        self.race_finished = threading.Event()
+        # Lock to protect the result variable
+        self.lock = threading.Lock()
+        # Store thread objects for termination
+        self.threads = {}
+        # Stop flags for each thread
+        self.stop_flags = {}
+
+    def task1(self, stop_flag):
+        """First task function, can be modified as needed"""
+        # Simulate random work time
+        sleep_time = random.uniform(0.1, 2.0)
+
+        # Break the sleep into smaller chunks to check stop flag
+        chunks = 20
+        chunk_time = sleep_time / chunks
+
+        for _ in range(chunks):
+            # Check if we should stop
+            if stop_flag.is_set():
+                return None
+            time.sleep(chunk_time)
+
+        return f"Task 1 completed in: {sleep_time:.2f} seconds"
+
+    def task2(self, stop_flag):
+        """Second task function, can be modified as needed"""
+        # Simulate random work time
+        sleep_time = random.uniform(0.1, 2.0)
+
+        # Break the sleep into smaller chunks to check stop flag
+        chunks = 20
+        chunk_time = sleep_time / chunks
+
+        for _ in range(chunks):
+            # Check if we should stop
+            if stop_flag.is_set():
+                return None
+            time.sleep(chunk_time)
+
+        return f"Task 2 completed in: {sleep_time:.2f} seconds"
+
+    def worker(self, task_func, task_name):
+        """Worker thread function"""
+        # Create a stop flag for this task
+        stop_flag = threading.Event()
+        self.stop_flags[task_name] = stop_flag
+
+        try:
+            # Execute the task with stop flag
+            result = task_func(stop_flag)
+
+            # If the race is already finished or we were asked to stop, return immediately
+            if self.race_finished.is_set() or stop_flag.is_set():
+                return None
+
+            # Try to set the result (if no other thread has set it yet)
+            with self.lock:
+                if not self.race_finished.is_set():
+                    self.result = (task_name, result)
+                    # Mark the race as finished
+                    self.race_finished.set()
+                    print(f"{task_name} won the race!")
+
+                    # Signal other threads to stop
+                    for name, flag in self.stop_flags.items():
+                        if name != task_name:
+                            print(f"Signaling {name} to stop")
+                            flag.set()
+
+                    return self.result
+
+        except Exception as e:
+            print(f"{task_name} encountered an error: {e}")
+
+        return None
+
+    def run_race(self):
+        """Start the competition and return the result of the fastest thread"""
+        # Reset state
+        self.race_finished.clear()
+        self.result = None
+        self.threads.clear()
+        self.stop_flags.clear()
+
+        # Create threads
+        thread1 = threading.Thread(target=self.worker, args=(self.task1, "Thread 1"))
+        thread2 = threading.Thread(target=self.worker, args=(self.task2, "Thread 2"))
+
+        # Record thread objects for later joining
+        self.threads["Thread 1"] = thread1
+        self.threads["Thread 2"] = thread2
+
+        # Start threads
+        thread1.start()
+        thread2.start()
+
+        # Wait for any thread to complete
+        while not self.race_finished.is_set():
+            time.sleep(0.01)  # Small delay to avoid high CPU usage
+
+            # If all threads have ended but no result is set, there's a problem
+            if (
+                not thread1.is_alive()
+                and not thread2.is_alive()
+                and not self.race_finished.is_set()
+            ):
+                print("All threads have ended, but there's no winner")
+                return None
+
+        # Wait for all threads to end (with timeout to avoid infinite waiting)
+        thread1.join(timeout=1.0)
+        thread2.join(timeout=1.0)
+
+        # Return the result
+        return self.result
+
+
+# Usage example
+if __name__ == "__main__":
+    race = ThreadRace()
+    result = race.run_race()
+    print(f"Winner: {result[0] if result else None}")
+    print(f"Result: {result[1] if result else None}")
diff --git a/evaluation/scripts/temporal_locomo/temporal_locomo_eval.py b/evaluation/scripts/temporal_locomo/temporal_locomo_eval.py
@@ -33,7 +33,7 @@ def __init__(self, args):
         self.locomo_evaluator = LocomoEvaluator(args=args)
         self.locomo_metric = LocomoMetric(args=args)
 
-    def run_eval_pipeline(self, skip_ingestion=True, skip_processing=False):
+    def run_answer_hit_eval_pipeline(self, skip_ingestion=True, skip_processing=False):
         """
         Run the complete evaluation pipeline including dataset conversion,
         data ingestion, and processing.
@@ -99,6 +99,32 @@ def run_eval_pipeline(self, skip_ingestion=True, skip_processing=False):
         print(f"  - Statistics: {self.stats_path}")
         print("=" * 80)
 
+    def run_inference_eval_pipeline(self, skip_ingestion=True, skip_processing=False):
+        """
+        Run the complete evaluation pipeline including dataset conversion,
+        data ingestion, and processing.
+        """
+        print("=" * 80)
+        print("Starting TimeLocomo Evaluation Pipeline")
+        print("=" * 80)
+
+        # Step 1: Check if temporal_locomo dataset exists, if not convert it
+        temporal_locomo_file = self.data_dir / "temporal_locomo" / "temporal_locomo_qa.json"
+        if not temporal_locomo_file.exists():
+            print(f"Temporal locomo dataset not found at {temporal_locomo_file}")
+            print("Converting locomo dataset to temporal_locomo format...")
+            self.convert_locomo_to_temporal_locomo(output_dir=self.data_dir / "temporal_locomo")
+            print("Dataset conversion completed.")
+        else:
+            print(f"Temporal locomo dataset found at {temporal_locomo_file}, skipping conversion.")
+
+        # Step 2: Data ingestion
+        if not skip_ingestion:
+            print("\n" + "=" * 50)
+            print("Step 2: Data Ingestion")
+            print("=" * 50)
+            self.locomo_ingestor.run_ingestion()
+
     def compute_can_answer_count_by_pre_evidences(self, rounds_to_consider):
         """
         Compute can-answer statistics per day for each conversation using the
@@ -120,7 +146,7 @@ def compute_can_answer_count_by_pre_evidences(self, rounds_to_consider):
     parser.add_argument(
         "--frame",
         type=str,
-        default="memos_scheduler",
+        default="memos",
         choices=["zep", "memos", "mem0", "mem0_graph", "memos_scheduler"],
         help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
     )
@@ -152,8 +178,4 @@ def compute_can_answer_count_by_pre_evidences(self, rounds_to_consider):
     args = parser.parse_args()
 
     evaluator = TemporalLocomoEval(args=args)
-    evaluator.run_eval_pipeline()
-
-    # rule-based baselines
-    evaluator.compute_can_answer_count_by_pre_evidences(rounds_to_consider=float("inf"))
-    evaluator.compute_can_answer_count_by_pre_evidences(rounds_to_consider=1)
+    evaluator.run_answer_hit_eval_pipeline()
diff --git a/src/memos/configs/mem_scheduler.py b/src/memos/configs/mem_scheduler.py
@@ -11,7 +11,7 @@
     BASE_DIR,
     DEFAULT_ACT_MEM_DUMP_PATH,
     DEFAULT_CONSUME_INTERVAL_SECONDS,
-    DEFAULT_THREAD__POOL_MAX_WORKERS,
+    DEFAULT_THREAD_POOL_MAX_WORKERS,
 )
 
 
@@ -25,10 +25,10 @@ class BaseSchedulerConfig(BaseConfig):
         default=True, description="Whether to enable parallel message processing using thread pool"
     )
     thread_pool_max_workers: int = Field(
-        default=DEFAULT_THREAD__POOL_MAX_WORKERS,
+        default=DEFAULT_THREAD_POOL_MAX_WORKERS,
         gt=1,
         lt=20,
-        description=f"Maximum worker threads in pool (default: {DEFAULT_THREAD__POOL_MAX_WORKERS})",
+        description=f"Maximum worker threads in pool (default: {DEFAULT_THREAD_POOL_MAX_WORKERS})",
     )
     consume_interval_seconds: float = Field(
         default=DEFAULT_CONSUME_INTERVAL_SECONDS,
diff --git a/src/memos/mem_scheduler/base_scheduler.py b/src/memos/mem_scheduler/base_scheduler.py
@@ -20,7 +20,7 @@
 from memos.mem_scheduler.schemas.general_schemas import (
     DEFAULT_ACT_MEM_DUMP_PATH,
     DEFAULT_CONSUME_INTERVAL_SECONDS,
-    DEFAULT_THREAD__POOL_MAX_WORKERS,
+    DEFAULT_THREAD_POOL_MAX_WORKERS,
     MemCubeID,
     TreeTextMemory_SEARCH_METHOD,
     UserID,
@@ -60,7 +60,7 @@ def __init__(self, config: BaseSchedulerConfig):
         self.search_method = TreeTextMemory_SEARCH_METHOD
         self.enable_parallel_dispatch = self.config.get("enable_parallel_dispatch", False)
         self.thread_pool_max_workers = self.config.get(
-            "thread_pool_max_workers", DEFAULT_THREAD__POOL_MAX_WORKERS
+            "thread_pool_max_workers", DEFAULT_THREAD_POOL_MAX_WORKERS
         )
 
         self.retriever: SchedulerRetriever | None = None
diff --git a/src/memos/mem_scheduler/general_modules/dispatcher.py b/src/memos/mem_scheduler/general_modules/dispatcher.py
@@ -1,11 +1,14 @@
 import concurrent
+import threading
 
 from collections import defaultdict
 from collections.abc import Callable
+from typing import Any
 
 from memos.context.context import ContextThreadPoolExecutor
 from memos.log import get_logger
 from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
+from memos.mem_scheduler.general_modules.task_threads import ThreadRace
 from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
 
 
@@ -22,6 +25,7 @@ class SchedulerDispatcher(BaseSchedulerModule):
     - Batch message processing
     - Graceful shutdown
     - Bulk handler registration
+    - Thread race competition for parallel task execution
     """
 
     def __init__(self, max_workers=30, enable_parallel_dispatch=False):
@@ -49,6 +53,9 @@ def __init__(self, max_workers=30, enable_parallel_dispatch=False):
         # Set to track active futures for monitoring purposes
         self._futures = set()
 
+        # Thread race module for competitive task execution
+        self.thread_race = ThreadRace()
+
     def register_handler(self, label: str, handler: Callable[[list[ScheduleMessageItem]], None]):
         """
         Register a handler function for a specific message label.
@@ -177,6 +184,22 @@ def join(self, timeout: float | None = None) -> bool:
 
         return len(not_done) == 0
 
+    def run_competitive_tasks(
+        self, tasks: dict[str, Callable[[threading.Event], Any]], timeout: float = 10.0
+    ) -> tuple[str, Any] | None:
+        """
+        Run multiple tasks in a competitive race, returning the result of the first task to complete.
+
+        Args:
+            tasks: Dictionary mapping task names to task functions that accept a stop_flag parameter
+            timeout: Maximum time to wait for any task to complete (in seconds)
+
+        Returns:
+            Tuple of (task_name, result) from the winning task, or None if no task completes
+        """
+        logger.info(f"Starting competitive execution of {len(tasks)} tasks")
+        return self.thread_race.run_race(tasks, timeout)
+
     def shutdown(self) -> None:
         """Gracefully shutdown the dispatcher."""
         self._running = False
diff --git a/src/memos/mem_scheduler/general_modules/task_threads.py b/src/memos/mem_scheduler/general_modules/task_threads.py
diff --git a/src/memos/mem_scheduler/schemas/general_schemas.py b/src/memos/mem_scheduler/schemas/general_schemas.py
diff --git a/tests/mem_scheduler/test_dispatcher.py b/tests/mem_scheduler/test_dispatcher.py

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`BASE_DIR,`
`12`	`12`	`DEFAULT_ACT_MEM_DUMP_PATH,`
`13`	`13`	`DEFAULT_CONSUME_INTERVAL_SECONDS,`
`14`		`- DEFAULT_THREAD__POOL_MAX_WORKERS,`
	`14`	`+ DEFAULT_THREAD_POOL_MAX_WORKERS,`
`15`	`15`	`)`
`16`	`16`
`17`	`17`
`@@ -25,10 +25,10 @@ class BaseSchedulerConfig(BaseConfig):`
`25`	`25`	`default=True, description="Whether to enable parallel message processing using thread pool"`
`26`	`26`	`)`
`27`	`27`	`thread_pool_max_workers: int = Field(`
`28`		`- default=DEFAULT_THREAD__POOL_MAX_WORKERS,`
	`28`	`+ default=DEFAULT_THREAD_POOL_MAX_WORKERS,`
`29`	`29`	`gt=1,`
`30`	`30`	`lt=20,`
`31`		`- description=f"Maximum worker threads in pool (default: {DEFAULT_THREAD__POOL_MAX_WORKERS})",`
	`31`	`+ description=f"Maximum worker threads in pool (default: {DEFAULT_THREAD_POOL_MAX_WORKERS})",`
`32`	`32`	`)`
`33`	`33`	`consume_interval_seconds: float = Field(`
`34`	`34`	`default=DEFAULT_CONSUME_INTERVAL_SECONDS,`