Skip to content

Commit 202afd3

Browse files
authored
Merge branch 'dev' into feat/fix_pref_recommandation
2 parents a1f048a + e07a1b4 commit 202afd3

File tree

3 files changed

+39
-169
lines changed

3 files changed

+39
-169
lines changed

README.md

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,20 @@
5454

5555
## 📈 Performance Benchmark
5656

57-
MemOS demonstrates significant improvements over baseline memory solutions in multiple reasoning tasks.
58-
59-
| Model | Avg. Score | Multi-Hop | Open Domain | Single-Hop | Temporal Reasoning |
60-
|-------------|------------|-----------|-------------|------------|---------------------|
61-
| **OpenAI** | 0.5275 | 0.6028 | 0.3299 | 0.6183 | 0.2825 |
62-
| **MemOS** | **0.7331** | **0.6430** | **0.5521** | **0.7844** | **0.7321** |
63-
| **Improvement** | **+38.98%** | **+6.67%** | **+67.35%** | **+26.86%** | **+159.15%** |
64-
65-
> 💡 **Temporal reasoning accuracy improved by 159% compared to the OpenAI baseline.**
66-
67-
### Details of End-to-End Evaluation on LOCOMO
68-
69-
> [!NOTE]
70-
> Comparison of LLM Judge Scores across five major tasks in the LOCOMO benchmark. Each bar shows the mean evaluation score judged by LLMs for a given method-task pair, with standard deviation as error bars. MemOS-0630 consistently outperforms baseline methods (LangMem, Zep, OpenAI, Mem0) across all task types, especially in multi-hop and temporal reasoning scenarios.
71-
72-
<img src="https://statics.memtensor.com.cn/memos/score_all_end2end.jpg" alt="END2END SCORE">
57+
MemOS demonstrates significant improvements over baseline memory solutions in multiple memory tasks,
58+
showcasing its capabilities in **information extraction**, **temporal and cross-session reasoning**, and **personalized preference responses**.
59+
60+
| Model | LOCOMO | LongMemEval | PrefEval-10 | PersonaMem |
61+
|-----------------|-------------|-------------|-------------|-------------|
62+
| **GPT-4o-mini** | 52.75 | 55.4 | 2.8 | 43.46 |
63+
| **MemOS** | **75.80** | **77.80** | **71.90** | **61.17** |
64+
| **Improvement** | **+43.70%** | **+40.43%** | **+2568%** | **+40.75%** |
65+
66+
### Detailed Evaluation Results
67+
- We use gpt-4o-mini as the processing and judging LLM and bge-m3 as embedding model in MemOS evaluation.
68+
- The evaluation was conducted under conditions that align various settings as closely as possible. Reproduce the results with our scripts at [`evaluation`](./evaluation).
69+
- Check the full search and response details at huggingface https://huggingface.co/datasets/MemTensor/MemOS_eval_result.
70+
> 💡 **MemOS outperforms all other methods (Mem0, Zep, Memobase, SuperMemory et al.) across all benchmarks!**
7371
7472
## ✨ Key Features
7573

evaluation/scripts/locomo/locomo_ingestion.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def ingest_session(client, session, frame, version, metadata):
8888
return elapsed_time
8989

9090

91-
def process_user(conv_idx, frame, locomo_df, version):
91+
def process_user(conv_idx, frame, locomo_df, version, success_records, f):
9292
conversation = locomo_df["conversation"].iloc[conv_idx]
9393
max_session_count = 35
9494
start_time = time.time()
@@ -149,11 +149,15 @@ def process_user(conv_idx, frame, locomo_df, version):
149149

150150
print(f"Processing {valid_sessions} sessions for user {conv_idx}")
151151

152-
for session, metadata in sessions_to_process:
153-
session_time = ingest_session(client, session, frame, version, metadata)
154-
total_session_time += session_time
155-
print(f"User {conv_idx}, {metadata['session_key']} processed in {session_time} seconds")
156-
152+
for session_idx, (session, metadata) in enumerate(sessions_to_process):
153+
if f"{conv_idx}_{session_idx}" not in success_records:
154+
session_time = ingest_session(client, session, frame, version, metadata)
155+
total_session_time += session_time
156+
print(f"User {conv_idx}, {metadata['session_key']} processed in {session_time} seconds")
157+
f.write(f"{conv_idx}_{session_idx}\n")
158+
f.flush()
159+
else:
160+
print(f"Session {conv_idx}_{session_idx} already ingested")
157161
end_time = time.time()
158162
elapsed_time = round(end_time - start_time, 2)
159163
print(f"User {conv_idx} processed successfully in {elapsed_time} seconds")
@@ -170,9 +174,20 @@ def main(frame, version="default", num_workers=4):
170174
print(
171175
f"Starting processing for {num_users} users in serial mode, each user using {num_workers} workers for sessions..."
172176
)
173-
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
177+
os.makedirs(f"results/locomo/{frame}-{version}/", exist_ok=True)
178+
success_records = []
179+
record_file = f"results/locomo/{frame}-{version}/success_records.txt"
180+
if os.path.exists(record_file):
181+
with open(record_file) as f:
182+
for i in f.readlines():
183+
success_records.append(i.strip())
184+
185+
with (
186+
concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor,
187+
open(record_file, "a+") as f,
188+
):
174189
futures = [
175-
executor.submit(process_user, user_id, frame, locomo_df, version)
190+
executor.submit(process_user, user_id, frame, locomo_df, version, success_records, f)
176191
for user_id in range(num_users)
177192
]
178193
for future in concurrent.futures.as_completed(futures):
@@ -216,7 +231,7 @@ def main(frame, version="default", num_workers=4):
216231
help="Version identifier for saving results (e.g., 1010)",
217232
)
218233
parser.add_argument(
219-
"--workers", type=int, default=3, help="Number of parallel workers to process users"
234+
"--workers", type=int, default=10, help="Number of parallel workers to process users"
220235
)
221236
args = parser.parse_args()
222237
lib = args.lib

src/memos/api/middleware/request_context.py

Lines changed: 1 addition & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
Request context middleware for automatic trace_id injection.
33
"""
44

5-
import json
6-
import os
75
import time
86

97
from collections.abc import Callable
@@ -19,9 +17,6 @@
1917

2018
logger = memos.log.get_logger(__name__)
2119

22-
# Maximum body size to read for logging (in bytes) - bodies larger than this will be skipped
23-
MAX_BODY_LOG_SIZE = os.getenv("MAX_BODY_LOG_SIZE", 10 * 1024)
24-
2520

2621
def extract_trace_id_from_headers(request: Request) -> str | None:
2722
"""Extract trace_id from various possible headers with priority: g-trace-id > x-trace-id > trace-id."""
@@ -31,127 +26,6 @@ def extract_trace_id_from_headers(request: Request) -> str | None:
3126
return None
3227

3328

34-
def _is_json_request(request: Request) -> tuple[bool, str]:
35-
"""
36-
Check if request is a JSON request.
37-
38-
Args:
39-
request: The request object
40-
41-
Returns:
42-
Tuple of (is_json, content_type)
43-
"""
44-
if request.method not in ("POST", "PUT", "PATCH", "DELETE"):
45-
return False, ""
46-
47-
content_type = request.headers.get("content-type", "")
48-
if not content_type:
49-
return False, ""
50-
51-
is_json = "application/json" in content_type.lower()
52-
return is_json, content_type
53-
54-
55-
def _should_read_body(content_length: str | None) -> tuple[bool, int | None]:
56-
"""
57-
Check if body should be read based on content-length header.
58-
59-
Args:
60-
content_length: Content-Length header value
61-
62-
Returns:
63-
Tuple of (should_read, body_size). body_size is None if header is invalid.
64-
"""
65-
if not content_length:
66-
return True, None
67-
68-
try:
69-
body_size = int(content_length)
70-
return body_size <= MAX_BODY_LOG_SIZE, body_size
71-
except ValueError:
72-
return True, None
73-
74-
75-
def _create_body_info(content_type: str, body_size: int) -> dict:
76-
"""Create body_info dict for large bodies that are skipped."""
77-
return {
78-
"content_type": content_type,
79-
"content_length": body_size,
80-
"note": f"body too large ({body_size} bytes), skipping read",
81-
}
82-
83-
84-
def _parse_json_body(body_bytes: bytes) -> dict | str:
85-
"""
86-
Parse JSON body bytes.
87-
88-
Args:
89-
body_bytes: Raw body bytes
90-
91-
Returns:
92-
Parsed JSON dict, or error message string if parsing fails
93-
"""
94-
try:
95-
return json.loads(body_bytes)
96-
except (json.JSONDecodeError, UnicodeDecodeError) as e:
97-
return f"<unable to parse JSON: {e!s}>"
98-
99-
100-
async def get_request_params(request: Request) -> tuple[dict, bytes | None]:
101-
"""
102-
Extract request parameters (query params and body) for logging.
103-
104-
Only reads body for application/json requests that are within size limits.
105-
106-
This function is wrapped with exception handling to ensure logging failures
107-
don't affect the actual request processing.
108-
109-
Args:
110-
request: The incoming request object
111-
112-
Returns:
113-
Tuple of (params_dict, body_bytes). body_bytes is None if body was not read.
114-
Returns empty dict and None on any error.
115-
"""
116-
try:
117-
params_log = {}
118-
119-
# Check if this is a JSON request
120-
is_json, content_type = _is_json_request(request)
121-
if not is_json:
122-
return params_log, None
123-
124-
# Pre-check body size using content-length header
125-
content_length = request.headers.get("content-length")
126-
should_read, body_size = _should_read_body(content_length)
127-
128-
if not should_read and body_size is not None:
129-
params_log["body_info"] = _create_body_info(content_type, body_size)
130-
return params_log, None
131-
132-
# Read body
133-
body_bytes = await request.body()
134-
135-
if not body_bytes:
136-
return params_log, None
137-
138-
# Post-check: verify actual size (content-length might be missing or wrong)
139-
actual_size = len(body_bytes)
140-
if actual_size > MAX_BODY_LOG_SIZE:
141-
params_log["body_info"] = _create_body_info(content_type, actual_size)
142-
return params_log, None
143-
144-
# Parse JSON body
145-
params_log["body"] = _parse_json_body(body_bytes)
146-
return params_log, body_bytes
147-
148-
except Exception as e:
149-
# Catch-all for any unexpected errors
150-
logger.error(f"Unexpected error in get_request_params: {e}", exc_info=True)
151-
# Return empty dict to ensure request can continue
152-
return {}, None
153-
154-
15529
class RequestContextMiddleware(BaseHTTPMiddleware):
15630
"""
15731
Middleware to automatically inject request context for every HTTP request.
@@ -193,26 +67,9 @@ async def dispatch(self, request: Request, call_next: Callable) -> Response:
19367
)
19468
set_request_context(context)
19569

196-
# Get request parameters for logging
197-
# Wrap in try-catch to ensure logging failures don't break the request
198-
params_log, body_bytes = await get_request_params(request)
199-
200-
# Re-create the request receive function if body was read
201-
# This ensures downstream handlers can still read the body
202-
if body_bytes is not None:
203-
try:
204-
205-
async def receive():
206-
return {"type": "http.request", "body": body_bytes, "more_body": False}
207-
208-
request._receive = receive
209-
except Exception as e:
210-
logger.error(f"Failed to recreate request receive function: {e}")
211-
# Continue without restoring body, downstream handlers will handle it
212-
21370
logger.info(
21471
f"Request started, source: {self.source}, method: {request.method}, path: {request.url.path}, "
215-
f"request params: {params_log}, headers: {request.headers}"
72+
f"headers: {request.headers}"
21673
)
21774

21875
# Process the request

0 commit comments

Comments
 (0)