Merge branch 'dev' into feat/fix_pref_recommandation

CaralHsi · web-flow · commit 202afd39271a · 2025-11-05T10:41:06.000+08:00
diff --git a/README.md b/README.md
@@ -54,22 +54,20 @@
 
 ## 📈 Performance Benchmark
 
-MemOS demonstrates significant improvements over baseline memory solutions in multiple reasoning tasks.
-
-| Model       | Avg. Score | Multi-Hop | Open Domain | Single-Hop | Temporal Reasoning |
-|-------------|------------|-----------|-------------|------------|---------------------|
-| **OpenAI**  | 0.5275     | 0.6028    | 0.3299      | 0.6183     | 0.2825              |
-| **MemOS**   | **0.7331** | **0.6430** | **0.5521**   | **0.7844** | **0.7321**          |
-| **Improvement** | **+38.98%** | **+6.67%** | **+67.35%** | **+26.86%** | **+159.15%**       |
-
-> 💡 **Temporal reasoning accuracy improved by 159% compared to the OpenAI baseline.**
-
-### Details of End-to-End Evaluation on LOCOMO
-
-> [!NOTE]
-> Comparison of LLM Judge Scores across five major tasks in the LOCOMO benchmark. Each bar shows the mean evaluation score judged by LLMs for a given method-task pair, with standard deviation as error bars. MemOS-0630 consistently outperforms baseline methods (LangMem, Zep, OpenAI, Mem0) across all task types, especially in multi-hop and temporal reasoning scenarios.
-
-<img src="https://statics.memtensor.com.cn/memos/score_all_end2end.jpg" alt="END2END SCORE">
+MemOS demonstrates significant improvements over baseline memory solutions in multiple memory tasks,
+showcasing its capabilities in **information extraction**, **temporal and cross-session reasoning**, and **personalized preference responses**.
+
+| Model           | LOCOMO      | LongMemEval | PrefEval-10 | PersonaMem  | 
+|-----------------|-------------|-------------|-------------|-------------|
+| **GPT-4o-mini** | 52.75       | 55.4        | 2.8         | 43.46       |
+| **MemOS**       | **75.80**   | **77.80**   | **71.90**   | **61.17**   |
+| **Improvement** | **+43.70%** | **+40.43%** | **+2568%**  | **+40.75%** |
+
+### Detailed Evaluation Results
+- We use gpt-4o-mini as the processing and judging LLM and bge-m3 as embedding model in MemOS evaluation.
+- The evaluation was conducted under conditions that align various settings as closely as possible. Reproduce the results with our scripts at [`evaluation`](./evaluation).
+- Check the full search and response details at huggingface https://huggingface.co/datasets/MemTensor/MemOS_eval_result.
+> 💡 **MemOS outperforms all other methods (Mem0, Zep, Memobase, SuperMemory et al.) across all benchmarks!**
 
 ## ✨ Key Features
 
diff --git a/evaluation/scripts/locomo/locomo_ingestion.py b/evaluation/scripts/locomo/locomo_ingestion.py
@@ -88,7 +88,7 @@ def ingest_session(client, session, frame, version, metadata):
     return elapsed_time
 
 
-def process_user(conv_idx, frame, locomo_df, version):
+def process_user(conv_idx, frame, locomo_df, version, success_records, f):
     conversation = locomo_df["conversation"].iloc[conv_idx]
     max_session_count = 35
     start_time = time.time()
@@ -149,11 +149,15 @@ def process_user(conv_idx, frame, locomo_df, version):
 
     print(f"Processing {valid_sessions} sessions for user {conv_idx}")
 
-    for session, metadata in sessions_to_process:
-        session_time = ingest_session(client, session, frame, version, metadata)
-        total_session_time += session_time
-        print(f"User {conv_idx}, {metadata['session_key']} processed in {session_time} seconds")
-
+    for session_idx, (session, metadata) in enumerate(sessions_to_process):
+        if f"{conv_idx}_{session_idx}" not in success_records:
+            session_time = ingest_session(client, session, frame, version, metadata)
+            total_session_time += session_time
+            print(f"User {conv_idx}, {metadata['session_key']} processed in {session_time} seconds")
+            f.write(f"{conv_idx}_{session_idx}\n")
+            f.flush()
+        else:
+            print(f"Session {conv_idx}_{session_idx} already ingested")
     end_time = time.time()
     elapsed_time = round(end_time - start_time, 2)
     print(f"User {conv_idx} processed successfully in {elapsed_time} seconds")
@@ -170,9 +174,20 @@ def main(frame, version="default", num_workers=4):
     print(
         f"Starting processing for {num_users} users in serial mode, each user using {num_workers} workers for sessions..."
     )
-    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
+    os.makedirs(f"results/locomo/{frame}-{version}/", exist_ok=True)
+    success_records = []
+    record_file = f"results/locomo/{frame}-{version}/success_records.txt"
+    if os.path.exists(record_file):
+        with open(record_file) as f:
+            for i in f.readlines():
+                success_records.append(i.strip())
+
+    with (
+        concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor,
+        open(record_file, "a+") as f,
+    ):
         futures = [
-            executor.submit(process_user, user_id, frame, locomo_df, version)
+            executor.submit(process_user, user_id, frame, locomo_df, version, success_records, f)
             for user_id in range(num_users)
         ]
         for future in concurrent.futures.as_completed(futures):
@@ -216,7 +231,7 @@ def main(frame, version="default", num_workers=4):
         help="Version identifier for saving results (e.g., 1010)",
     )
     parser.add_argument(
-        "--workers", type=int, default=3, help="Number of parallel workers to process users"
+        "--workers", type=int, default=10, help="Number of parallel workers to process users"
     )
     args = parser.parse_args()
     lib = args.lib
diff --git a/src/memos/api/middleware/request_context.py b/src/memos/api/middleware/request_context.py
@@ -2,8 +2,6 @@
 Request context middleware for automatic trace_id injection.
 """
 
-import json
-import os
 import time
 
 from collections.abc import Callable
@@ -19,9 +17,6 @@
 
 logger = memos.log.get_logger(__name__)
 
-# Maximum body size to read for logging (in bytes) - bodies larger than this will be skipped
-MAX_BODY_LOG_SIZE = os.getenv("MAX_BODY_LOG_SIZE", 10 * 1024)
-
 
 def extract_trace_id_from_headers(request: Request) -> str | None:
     """Extract trace_id from various possible headers with priority: g-trace-id > x-trace-id > trace-id."""
@@ -31,127 +26,6 @@ def extract_trace_id_from_headers(request: Request) -> str | None:
     return None
 
 
-def _is_json_request(request: Request) -> tuple[bool, str]:
-    """
-    Check if request is a JSON request.
-
-    Args:
-        request: The request object
-
-    Returns:
-        Tuple of (is_json, content_type)
-    """
-    if request.method not in ("POST", "PUT", "PATCH", "DELETE"):
-        return False, ""
-
-    content_type = request.headers.get("content-type", "")
-    if not content_type:
-        return False, ""
-
-    is_json = "application/json" in content_type.lower()
-    return is_json, content_type
-
-
-def _should_read_body(content_length: str | None) -> tuple[bool, int | None]:
-    """
-    Check if body should be read based on content-length header.
-
-    Args:
-        content_length: Content-Length header value
-
-    Returns:
-        Tuple of (should_read, body_size). body_size is None if header is invalid.
-    """
-    if not content_length:
-        return True, None
-
-    try:
-        body_size = int(content_length)
-        return body_size <= MAX_BODY_LOG_SIZE, body_size
-    except ValueError:
-        return True, None
-
-
-def _create_body_info(content_type: str, body_size: int) -> dict:
-    """Create body_info dict for large bodies that are skipped."""
-    return {
-        "content_type": content_type,
-        "content_length": body_size,
-        "note": f"body too large ({body_size} bytes), skipping read",
-    }
-
-
-def _parse_json_body(body_bytes: bytes) -> dict | str:
-    """
-    Parse JSON body bytes.
-
-    Args:
-        body_bytes: Raw body bytes
-
-    Returns:
-        Parsed JSON dict, or error message string if parsing fails
-    """
-    try:
-        return json.loads(body_bytes)
-    except (json.JSONDecodeError, UnicodeDecodeError) as e:
-        return f"<unable to parse JSON: {e!s}>"
-
-
-async def get_request_params(request: Request) -> tuple[dict, bytes | None]:
-    """
-    Extract request parameters (query params and body) for logging.
-
-    Only reads body for application/json requests that are within size limits.
-
-    This function is wrapped with exception handling to ensure logging failures
-    don't affect the actual request processing.
-
-    Args:
-        request: The incoming request object
-
-    Returns:
-        Tuple of (params_dict, body_bytes). body_bytes is None if body was not read.
-        Returns empty dict and None on any error.
-    """
-    try:
-        params_log = {}
-
-        # Check if this is a JSON request
-        is_json, content_type = _is_json_request(request)
-        if not is_json:
-            return params_log, None
-
-        # Pre-check body size using content-length header
-        content_length = request.headers.get("content-length")
-        should_read, body_size = _should_read_body(content_length)
-
-        if not should_read and body_size is not None:
-            params_log["body_info"] = _create_body_info(content_type, body_size)
-            return params_log, None
-
-        # Read body
-        body_bytes = await request.body()
-
-        if not body_bytes:
-            return params_log, None
-
-        # Post-check: verify actual size (content-length might be missing or wrong)
-        actual_size = len(body_bytes)
-        if actual_size > MAX_BODY_LOG_SIZE:
-            params_log["body_info"] = _create_body_info(content_type, actual_size)
-            return params_log, None
-
-        # Parse JSON body
-        params_log["body"] = _parse_json_body(body_bytes)
-        return params_log, body_bytes
-
-    except Exception as e:
-        # Catch-all for any unexpected errors
-        logger.error(f"Unexpected error in get_request_params: {e}", exc_info=True)
-        # Return empty dict to ensure request can continue
-        return {}, None
-
-
 class RequestContextMiddleware(BaseHTTPMiddleware):
     """
     Middleware to automatically inject request context for every HTTP request.
@@ -193,26 +67,9 @@ async def dispatch(self, request: Request, call_next: Callable) -> Response:
         )
         set_request_context(context)
 
-        # Get request parameters for logging
-        # Wrap in try-catch to ensure logging failures don't break the request
-        params_log, body_bytes = await get_request_params(request)
-
-        # Re-create the request receive function if body was read
-        # This ensures downstream handlers can still read the body
-        if body_bytes is not None:
-            try:
-
-                async def receive():
-                    return {"type": "http.request", "body": body_bytes, "more_body": False}
-
-                request._receive = receive
-            except Exception as e:
-                logger.error(f"Failed to recreate request receive function: {e}")
-                # Continue without restoring body, downstream handlers will handle it
-
         logger.info(
             f"Request started, source: {self.source}, method: {request.method}, path: {request.url.path}, "
-            f"request params: {params_log}, headers: {request.headers}"
+            f"headers: {request.headers}"
         )
 
         # Process the request