Skip to content

Commit c3b9e83

Browse files
author
harvey_xiang
committed
fix: merge dev conflict
2 parents 4b72a63 + 4f96241 commit c3b9e83

File tree

79 files changed

+4798
-1084
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+4798
-1084
lines changed

docker/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,4 @@ volcengine-python-sdk==4.0.6
157157
watchfiles==1.1.0
158158
websockets==15.0.1
159159
xlrd==2.0.2
160-
xlsxwriter==3.2.5
160+
xlsxwriter==3.2.5

evaluation/README.md

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Evaluation Memory Framework
22

3-
This repository provides tools and scripts for evaluating the LoCoMo dataset using various models and APIs.
3+
This repository provides tools and scripts for evaluating the `LoCoMo`, `LongMemEval`, `PrefEval`, `personaMem` dataset using various models and APIs.
44

55
## Installation
66

@@ -22,17 +22,32 @@ This repository provides tools and scripts for evaluating the LoCoMo dataset usi
2222
2. Copy the `configs-example/` directory to a new directory named `configs/`, and modify the configuration files inside it as needed. This directory contains model and API-specific settings.
2323

2424
## Setup MemOS
25+
### local server
2526
```bash
26-
#start server
27+
# modify {project_dir}/.env file and start server
2728
uvicorn memos.api.server_api:app --host 0.0.0.0 --port 8001 --workers 8
2829

29-
# modify .env file
30+
# configure {project_dir}/evaluation/.env file
3031
MEMOS_URL="http://127.0.0.1:8001"
3132
```
33+
### online service
34+
```bash
35+
# get your api key at https://memos-dashboard.openmem.net/cn/quickstart/
36+
# configure {project_dir}/evaluation/.env file
37+
MEMOS_KEY="Token mpg-xxxxx"
38+
MEMOS_ONLINE_URL="https://memos.memtensor.cn/api/openmem/v1"
39+
40+
```
41+
42+
## Supported frameworks
43+
We support `memos-api` and `memos-api-online` in our scripts.
44+
And give unofficial implementations for the following memory frameworks:`zep`, `mem0`, `memobase`, `supermemory`, `memu`.
45+
46+
3247
## Evaluation Scripts
3348

3449
### LoCoMo Evaluation
35-
⚙️ To evaluate the **LoCoMo** dataset using one of the supported memory frameworks — `memos`, `mem0`, or `zep`run the following [script](./scripts/run_locomo_eval.sh):
50+
⚙️ To evaluate the **LoCoMo** dataset using one of the supported memory frameworks — run the following [script](./scripts/run_locomo_eval.sh):
3651

3752
```bash
3853
# Edit the configuration in ./scripts/run_locomo_eval.sh
@@ -53,7 +68,8 @@ First prepare the dataset `longmemeval_s` from https://huggingface.co/datasets/x
5368
```
5469

5570
### PrefEval Evaluation
56-
To evaluate the **Prefeval** dataset using one of the supported memory frameworks — `memos`, `mem0`, or `zep` — run the following [script](./scripts/run_prefeval_eval.sh):
71+
Downloading benchmark_dataset/filtered_inter_turns.json from https://github.com/amazon-science/PrefEval/blob/main/benchmark_dataset/filtered_inter_turns.json and save it as `./data/prefeval/filtered_inter_turns.json`.
72+
To evaluate the **Prefeval** dataset — run the following [script](./scripts/run_prefeval_eval.sh):
5773

5874
```bash
5975
# Edit the configuration in ./scripts/run_prefeval_eval.sh

evaluation/scripts/PrefEval/pref_eval.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -392,23 +392,39 @@ async def main(concurrency_limit: int, input_file: str, output_file: str, output
392392
if __name__ == "__main__":
393393
parser = argparse.ArgumentParser(description="Evaluate assistant responses from a JSONL file.")
394394

395-
parser.add_argument(
396-
"--input", type=str, required=True, help="Path to the input JSONL file from pref_memos.py."
397-
)
395+
parser.add_argument("--input", type=str, required=True, help="Path to the input JSONL file.")
398396

399397
parser.add_argument(
400398
"--concurrency-limit",
401399
type=int,
402400
default=10,
403401
help="The maximum number of concurrent API calls.",
404402
)
403+
404+
parser.add_argument(
405+
"--lib",
406+
type=str,
407+
choices=[
408+
"memos-api-online",
409+
"mem0",
410+
"mem0_graph",
411+
"memos-api",
412+
"memobase",
413+
"memu",
414+
"supermemory",
415+
"zep",
416+
],
417+
default="memos-api",
418+
help="Which library to use (used in 'add' mode).",
419+
)
420+
405421
args = parser.parse_args()
406422

407423
input_path = args.input
408424
output_dir = os.path.dirname(input_path)
409425

410-
output_jsonl_path = os.path.join(output_dir, "eval_pref_memos.jsonl")
411-
output_excel_path = os.path.join(output_dir, "eval_pref_memos_summary.xlsx")
426+
output_jsonl_path = os.path.join(output_dir, f"eval_pref_{args.lib}.jsonl")
427+
output_excel_path = os.path.join(output_dir, f"eval_pref_{args.lib}_summary.xlsx")
412428

413429
asyncio.run(
414430
main(

evaluation/scripts/PrefEval/pref_mem0.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@
2929

3030

3131
def add_memory_for_line(
32-
line_data: tuple, mem_client, num_irrelevant_turns: int, lib: str, version: str
32+
line_data: tuple,
33+
mem_client,
34+
num_irrelevant_turns: int,
35+
lib: str,
36+
version: str,
37+
success_records,
38+
f,
3339
) -> dict:
3440
"""
3541
Adds conversation memory for a single line of data to MemOS and returns the data with a persistent user_id.
@@ -46,13 +52,22 @@ def add_memory_for_line(
4652
elif num_irrelevant_turns == 300:
4753
conversation = conversation + irre_300
4854

49-
turns_add = 5
5055
start_time_add = time.monotonic()
51-
if conversation:
52-
for chunk_start in range(0, len(conversation), turns_add * 2):
53-
chunk = conversation[chunk_start : chunk_start + turns_add * 2]
54-
timestamp_add = int(time.time() * 100)
55-
mem_client.add(messages=chunk, user_id=user_id, timestamp=timestamp_add)
56+
57+
for idx, _ in enumerate(conversation[::2]):
58+
msg_idx = idx * 2
59+
record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}"
60+
timestamp_add = int(time.time() * 100)
61+
62+
if record_id not in success_records:
63+
mem_client.add(
64+
messages=conversation[msg_idx : msg_idx + 2],
65+
user_id=user_id,
66+
timestamp=timestamp_add,
67+
)
68+
f.write(f"{record_id}\n")
69+
f.flush()
70+
5671
end_time_add = time.monotonic()
5772
add_duration = end_time_add - start_time_add
5873

@@ -210,6 +225,15 @@ def main():
210225
from utils.client import Mem0Client
211226

212227
mem_client = Mem0Client(enable_graph="graph" in args.lib)
228+
os.makedirs(f"results/prefeval/{args.lib}_{args.version}", exist_ok=True)
229+
success_records = set()
230+
record_file = f"results/prefeval/{args.lib}_{args.version}/success_records.txt"
231+
if os.path.exists(record_file):
232+
print(f"Loading existing success records from {record_file}...")
233+
with open(record_file, encoding="utf-8") as f:
234+
for i in f.readlines():
235+
success_records.add(i.strip())
236+
print(f"Loaded {len(success_records)} records.")
213237

214238
if args.mode == "add":
215239
print(f"Running in 'add' mode. Ingesting memories from '{args.input}'...")
@@ -218,6 +242,7 @@ def main():
218242
with (
219243
open(args.output, "w", encoding="utf-8") as outfile,
220244
concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
245+
open(record_file, "a+", encoding="utf-8") as f,
221246
):
222247
futures = [
223248
executor.submit(
@@ -227,6 +252,8 @@ def main():
227252
args.add_turn,
228253
args.lib,
229254
args.version,
255+
success_records,
256+
f,
230257
)
231258
for i, line in enumerate(lines)
232259
]

evaluation/scripts/PrefEval/pref_memobase.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,20 @@
2828

2929

3030
def add_memory_for_line(
31-
line_data: tuple, mem_client, num_irrelevant_turns: int, lib: str, version: str
31+
line_data: tuple,
32+
mem_client,
33+
num_irrelevant_turns: int,
34+
lib: str,
35+
version: str,
36+
success_records,
37+
f,
3238
) -> dict:
3339
"""
3440
Adds conversation memory for a single line of data to MemOS and returns the data with a persistent user_id.
3541
"""
3642
i, line = line_data
3743
user_id = f"{lib}_user_pref_eval_{i}_{version}"
3844
mem_client.delete_user(user_id)
39-
user_id = mem_client.client.add_user({"user_id": user_id})
40-
print("user_id:", user_id)
4145
try:
4246
original_data = json.loads(line)
4347
conversation = original_data.get("conversation", [])
@@ -63,7 +67,14 @@ def add_memory_for_line(
6367
"created_at": timestamp_add,
6468
}
6569
)
66-
mem_client.add(messages=messages, user_id=user_id)
70+
for idx, _ in enumerate(conversation[::2]):
71+
msg_idx = idx * 2
72+
record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}"
73+
74+
if record_id not in success_records:
75+
mem_client.add(messages=conversation[msg_idx : msg_idx + 2], user_id=user_id)
76+
f.write(f"{record_id}\n")
77+
f.flush()
6778

6879
end_time_add = time.monotonic()
6980
add_duration = end_time_add - start_time_add
@@ -222,13 +233,24 @@ def main():
222233

223234
mem_client = MemobaseClient()
224235

236+
os.makedirs(f"results/prefeval/{args.lib}_{args.version}", exist_ok=True)
237+
success_records = set()
238+
record_file = f"results/prefeval/{args.lib}_{args.version}/success_records.txt"
239+
if os.path.exists(record_file):
240+
print(f"Loading existing success records from {record_file}...")
241+
with open(record_file, encoding="utf-8") as f:
242+
for i in f.readlines():
243+
success_records.add(i.strip())
244+
print(f"Loaded {len(success_records)} records.")
245+
225246
if args.mode == "add":
226247
print(f"Running in 'add' mode. Ingesting memories from '{args.input}'...")
227248
print(f"Adding {args.add_turn} irrelevant turns.")
228249
print(f"Using {args.max_workers} workers.")
229250
with (
230251
open(args.output, "w", encoding="utf-8") as outfile,
231252
concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
253+
open(record_file, "a+", encoding="utf-8") as f,
232254
):
233255
futures = [
234256
executor.submit(
@@ -238,6 +260,8 @@ def main():
238260
args.add_turn,
239261
args.lib,
240262
args.version,
263+
success_records,
264+
f,
241265
)
242266
for i, line in enumerate(lines)
243267
]

evaluation/scripts/PrefEval/pref_memos.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
sys.path.insert(0, ROOT_DIR)
2222
sys.path.insert(0, EVAL_SCRIPTS_DIR)
2323

24-
2524
load_dotenv()
2625
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
2726
BASE_URL = os.getenv("OPENAI_BASE_URL")
@@ -30,8 +29,8 @@
3029

3130

3231
def add_memory_for_line(
33-
line_data: tuple, mem_client, num_irrelevant_turns: int, lib: str, version: str
34-
) -> dict:
32+
line_data, mem_client, num_irrelevant_turns, lib, version, success_records, f
33+
):
3534
"""
3635
Adds conversation memory for a single line of data to MemOS and returns the data with a persistent user_id.
3736
"""
@@ -47,15 +46,22 @@ def add_memory_for_line(
4746
elif num_irrelevant_turns == 300:
4847
conversation = conversation + irre_300
4948

50-
turns_add = 5
5149
start_time_add = time.monotonic()
52-
if conversation:
53-
if os.getenv("PRE_SPLIT_CHUNK", "false").lower() == "true":
54-
for chunk_start in range(0, len(conversation), turns_add * 2):
55-
chunk = conversation[chunk_start : chunk_start + turns_add * 2]
56-
mem_client.add(messages=chunk, user_id=user_id, conv_id=None)
57-
else:
58-
mem_client.add(messages=conversation, user_id=user_id, conv_id=None)
50+
51+
for idx, _ in enumerate(conversation[::2]):
52+
msg_idx = idx * 2
53+
record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}"
54+
55+
if record_id not in success_records:
56+
mem_client.add(
57+
messages=conversation[msg_idx : msg_idx + 2],
58+
user_id=user_id,
59+
conv_id=None,
60+
batch_size=2,
61+
)
62+
f.write(f"{record_id}\n")
63+
f.flush()
64+
5965
end_time_add = time.monotonic()
6066
add_duration = end_time_add - start_time_add
6167

@@ -68,7 +74,7 @@ def add_memory_for_line(
6874
return None
6975

7076

71-
def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> dict:
77+
def search_memory_for_line(line_data, mem_client, top_k_value):
7278
"""
7379
Processes a single line of data, searching memory based on the question.
7480
"""
@@ -98,7 +104,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
98104
f"- {entry.get('memory', '')}"
99105
for entry in relevant_memories["text_mem"][0]["memories"]
100106
)
101-
+ f"\n{relevant_memories['pref_mem']}"
107+
+ f"\n{relevant_memories.get('pref_string', '')}"
102108
)
103109

104110
memory_tokens_used = len(tokenizer.encode(memories_str))
@@ -120,7 +126,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
120126
return None
121127

122128

123-
def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str) -> dict:
129+
def generate_response_for_line(line_data, openai_client, lib):
124130
"""
125131
Generates a response for a single line of data using pre-fetched memories.
126132
"""
@@ -195,7 +201,7 @@ def main():
195201
parser.add_argument(
196202
"--lib",
197203
type=str,
198-
choices=["memos-api", "memos-local"],
204+
choices=["memos-api", "memos-api-online"],
199205
default="memos-api",
200206
help="Which MemOS library to use (used in 'add' mode).",
201207
)
@@ -218,9 +224,22 @@ def main():
218224
print(f"Error: Input file '{args.input}' not found")
219225
return
220226

221-
from utils.client import MemosApiClient
227+
from utils.client import MemosApiClient, MemosApiOnlineClient
228+
229+
if args.lib == "memos-api":
230+
mem_client = MemosApiClient()
231+
elif args.lib == "memos-api-online":
232+
mem_client = MemosApiOnlineClient()
222233

223-
mem_client = MemosApiClient()
234+
os.makedirs(f"results/prefeval/{args.lib}_{args.version}", exist_ok=True)
235+
success_records = set()
236+
record_file = f"results/prefeval/{args.lib}_{args.version}/success_records.txt"
237+
if os.path.exists(record_file):
238+
print(f"Loading existing success records from {record_file}...")
239+
with open(record_file, encoding="utf-8") as f:
240+
for i in f.readlines():
241+
success_records.add(i.strip())
242+
print(f"Loaded {len(success_records)} records.")
224243

225244
if args.mode == "add":
226245
print(f"Running in 'add' mode. Ingesting memories from '{args.input}'...")
@@ -229,6 +248,7 @@ def main():
229248
with (
230249
open(args.output, "w", encoding="utf-8") as outfile,
231250
concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
251+
open(record_file, "a+", encoding="utf-8") as record_f,
232252
):
233253
futures = [
234254
executor.submit(
@@ -238,6 +258,8 @@ def main():
238258
args.add_turn,
239259
args.lib,
240260
args.version,
261+
success_records,
262+
record_f,
241263
)
242264
for i, line in enumerate(lines)
243265
]

0 commit comments

Comments
 (0)