Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
4ef7418
feat(eval): add eval dependencies
Duguce Jul 7, 2025
ddad8c1
feat(eval): add configs example
Duguce Jul 7, 2025
d623791
docs(eval): update README.md
Duguce Jul 7, 2025
3365de4
Merge branch 'MemTensor:dev' into dev
Duguce Jul 8, 2025
ed68e36
feat(eval): remove the dependency (pydantic)
Duguce Jul 8, 2025
4e99031
Merge branch 'MemTensor:dev' into dev
Duguce Jul 8, 2025
41368b9
feat(eval): add run locomo eval script
Duguce Jul 8, 2025
9477539
Merge branch 'MemTensor:dev' into dev
Duguce Jul 8, 2025
8cd9361
fix(eval): delete about memos redundant search branches
Duguce Jul 8, 2025
a900910
chore: fix format
Duguce Jul 8, 2025
204bd27
Merge branch 'MemTensor:dev' into dev
Duguce Jul 9, 2025
5d68ed9
Merge branch 'MemTensor:dev' into dev
Duguce Jul 10, 2025
42e9366
feat(eval): add openai memory on locomo - eval guide
Duguce Jul 10, 2025
f3d1d5d
Merge branch 'dev' into dev
Duguce Jul 10, 2025
9bada64
Merge branch 'MemTensor:dev' into dev
Duguce Jul 10, 2025
7881029
docs(eval): modify openai memory on locomo - eval guide
Duguce Jul 10, 2025
dd2b2c5
Merge branch 'MemTensor:dev' into dev
Duguce Jul 12, 2025
7a60c33
Merge branch 'MemTensor:dev' into dev
Duguce Jul 13, 2025
ed86648
Merge branch 'MemTensor:dev' into dev
Duguce Jul 15, 2025
aaab1ce
Merge branch 'MemTensor:dev' into dev
Duguce Jul 16, 2025
710d4db
Merge branch 'MemTensor:dev' into dev
Duguce Jul 16, 2025
c98ded4
feat(eval): add longmemeval evaluation pipeline
Duguce Jul 16, 2025
79a5bce
chore(eval): formatter
Duguce Jul 16, 2025
37e2933
chore: update
Duguce Jul 16, 2025
445c855
feat(eval): add configs example
Duguce Jul 16, 2025
82e60b5
Merge branch 'MemTensor:dev' into dev
Duguce Jul 17, 2025
09b5a72
fix(eval): bugs about longmemeval
Duguce Jul 17, 2025
efd2c0d
Merge branch 'MemTensor:dev' into dev
Duguce Jul 17, 2025
fc0005a
fix(eval): search top k
Duguce Jul 17, 2025
bf11ea7
chore(eval): update
Duguce Jul 17, 2025
715b399
Merge branch 'MemTensor:dev' into dev
Duguce Jul 20, 2025
0d0d037
feat(eval): support memos api mode
Duguce Jul 20, 2025
54521c6
Merge branch 'MemTensor:dev' into dev
Duguce Jul 21, 2025
1259729
Merge branch 'MemTensor:dev' into dev
Duguce Jul 23, 2025
638973a
Merge branch 'MemTensor:dev' into dev
Duguce Jul 23, 2025
284f8cc
Merge branch 'MemTensor:dev' into dev
Duguce Jul 23, 2025
29c88aa
Merge branch 'dev' into dev
Duguce Jul 23, 2025
b391c90
feat(eval): add memobase; fix bugs about share db
Duguce Jul 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions evaluation/scripts/locomo/locomo_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
except Exception as e:
print(f"Warning: Failed to download NLTK resources: {e}")


try:
sentence_model_name = "Qwen/Qwen3-Embedding-0.6B"
sentence_model = SentenceTransformer(sentence_model_name)
Expand Down Expand Up @@ -363,7 +362,7 @@ async def limited_task(task):
parser.add_argument(
"--lib",
type=str,
choices=["zep", "memos", "mem0", "mem0_graph", "langmem", "openai"],
choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api"],
help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the help string to include "openai" and the newly added "memos-api" for consistency with the choices list.
Ensure corresponding updates are made in other relevant files where this argument is defined or documented to maintain coherence across the codebase.

)
parser.add_argument(
Expand Down
56 changes: 43 additions & 13 deletions evaluation/scripts/locomo/locomo_ingestion.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
import os
import sys


sys.path.insert(
0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
)
sys.path.insert(
0,
os.path.join(
os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
),
"evaluation",
"scripts",
),
)

import argparse
import concurrent.futures
import json
import os
import time

from datetime import datetime, timezone
Expand All @@ -11,6 +28,7 @@
from dotenv import load_dotenv
from mem0 import MemoryClient
from tqdm import tqdm
from utils.client import memos_client
from zep_cloud.client import Zep

from memos.configs.mem_cube import GeneralMemCubeConfig
Expand Down Expand Up @@ -93,7 +111,7 @@ def get_client(frame: str, user_id: str | None = None, version: str = "default")
return mos


def ingest_session(client, session, frame, metadata, revised_client=None):
def ingest_session(client, session, frame, version, metadata, revised_client=None):
session_date = metadata["session_date"]
date_format = "%I:%M %p on %d %B, %Y UTC"
date_string = datetime.strptime(session_date, date_format).replace(tzinfo=timezone.utc)
Expand Down Expand Up @@ -125,7 +143,7 @@ def ingest_session(client, session, frame, metadata, revised_client=None):
group_id=conv_id,
)

elif frame == "memos":
elif frame == "memos" or frame == "memos-api":
messages = []
messages_reverse = []

Expand All @@ -149,16 +167,22 @@ def ingest_session(client, session, frame, metadata, revised_client=None):

speaker_a_user_id = conv_id + "_speaker_a"
speaker_b_user_id = conv_id + "_speaker_b"
if frame == "memos-api":
client.add(messages=messages, user_id=f"{speaker_a_user_id.replace('_', '')}{version}")

client.add(
messages=messages,
user_id=speaker_a_user_id,
)
revised_client.add(
messages=messages_reverse, user_id=f"{speaker_b_user_id.replace('_', '')}{version}"
)
elif frame == "memos":
client.add(
messages=messages,
user_id=speaker_a_user_id,
)

revised_client.add(
messages=messages_reverse,
user_id=speaker_b_user_id,
)
revised_client.add(
messages=messages_reverse,
user_id=speaker_b_user_id,
)
print(f"Added messages for {speaker_a_user_id} and {speaker_b_user_id} successfully.")

elif frame == "mem0" or frame == "mem0_graph":
Expand Down Expand Up @@ -246,6 +270,12 @@ def process_user(conv_idx, frame, locomo_df, version, num_workers=1):
speaker_b_user_id = conv_id + "_speaker_b"
client = get_client("memos", speaker_a_user_id, version)
revised_client = get_client("memos", speaker_b_user_id, version)
elif frame == "memos-api":
conv_id = "locomo_exp_user_" + str(conv_idx)
speaker_a_user_id = conv_id + "_speaker_a"
speaker_b_user_id = conv_id + "_speaker_b"
client = memos_client(mode="api")
revised_client = memos_client(mode="api")

sessions_to_process = []
for session_idx in range(max_session_count):
Expand All @@ -272,7 +302,7 @@ def process_user(conv_idx, frame, locomo_df, version, num_workers=1):
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = {
executor.submit(
ingest_session, client, session, frame, metadata, revised_client
ingest_session, client, session, frame, version, metadata, revised_client
): metadata["session_key"]
for session, metadata in sessions_to_process
}
Expand Down Expand Up @@ -340,7 +370,7 @@ def main(frame, version="default", num_workers=4):
parser.add_argument(
"--lib",
type=str,
choices=["zep", "memos", "mem0", "mem0_graph"],
choices=["zep", "memos", "mem0", "mem0_graph", "memos-api"],
help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion evaluation/scripts/locomo/locomo_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
parser.add_argument(
"--lib",
type=str,
choices=["zep", "memos", "mem0", "mem0_graph", "langmem", "openai"],
choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api"],
help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion evaluation/scripts/locomo/locomo_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def main(frame, version="default"):
parser.add_argument(
"--lib",
type=str,
choices=["zep", "memos", "mem0", "mem0_graph", "openai"],
choices=["zep", "memos", "mem0", "mem0_graph", "openai", "memos-api"],
help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
)
parser.add_argument(
Expand Down
71 changes: 65 additions & 6 deletions evaluation/scripts/locomo/locomo_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
import os
import sys


sys.path.insert(
0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
)
sys.path.insert(
0,
os.path.join(
os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
),
"evaluation",
"scripts",
),
)

import argparse
import json
import os

from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
Expand All @@ -11,7 +28,8 @@
from dotenv import load_dotenv
from mem0 import MemoryClient
from tqdm import tqdm
from utils import filter_memory_data
from utils.client import memos_client
from utils.memos_filters import filter_memory_data
from zep_cloud.client import Zep

from memos.configs.mem_os import MOSConfig
Expand Down Expand Up @@ -191,6 +209,38 @@ def memos_search(client, query, conv_id, speaker_a, speaker_b, reversed_client=N
return context, duration_ms


def memos_api_search(
client, query, conv_id, speaker_a, speaker_b, top_k, version, reversed_client=None
):
start = time()
speaker_a_user_id = conv_id + "_speaker_a"
search_a_results = client.search(
query=query, user_id=f"{speaker_a_user_id.replace('_', '')}{version}", top_k=top_k
)
speaker_a_context = ""
for item in search_a_results:
speaker_a_context += f"{item}\n"

speaker_b_user_id = conv_id + "_speaker_b"
search_b_results = reversed_client.search(
query=query, user_id=f"{speaker_b_user_id.replace('_', '')}{version}", top_k=top_k
)
speaker_b_context = ""
for item in search_b_results:
speaker_b_context += f"{item}\n"

context = TEMPLATE_MEMOS.format(
speaker_1=speaker_a,
speaker_1_memories=speaker_a_context,
speaker_2=speaker_b,
speaker_2_memories=speaker_b_context,
)

print(query, context)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the print statements in multiple search functions necessary? Also, note that the zep_search function does not have a corresponding print statement.

duration_ms = (time() - start) * 1000
return context, duration_ms


def mem0_graph_search(client, query, speaker_a_user_id, speaker_b_user_id, top_k=20):
start = time()
search_speaker_a_results = client.search(
Expand Down Expand Up @@ -297,7 +347,7 @@ def zep_search(client, query, group_id, top_k=20):
return context, duration_ms


def search_query(client, query, metadata, frame, reversed_client=None, top_k=20):
def search_query(client, query, metadata, frame, version, reversed_client=None, top_k=20):
conv_id = metadata.get("conv_id")
speaker_a = metadata.get("speaker_a")
speaker_b = metadata.get("speaker_b")
Expand All @@ -316,7 +366,11 @@ def search_query(client, query, metadata, frame, reversed_client=None, top_k=20)
)
elif frame == "memos":
context, duration_ms = memos_search(
client, query, conv_id, speaker_a, speaker_b, reversed_client
client, query, conv_id, speaker_a, speaker_b, version, reversed_client
)
elif frame == "memos-api":
context, duration_ms = memos_api_search(
client, query, conv_id, speaker_a, speaker_b, top_k, version, reversed_client
)
return context, duration_ms

Expand Down Expand Up @@ -364,6 +418,11 @@ def process_user(group_idx, locomo_df, frame, version, top_k=20, num_workers=1):
speaker_b_user_id = conv_id + "_speaker_b"
client = get_client(frame, speaker_a_user_id, version, top_k=top_k)
reversed_client = get_client(frame, speaker_b_user_id, version, top_k=top_k)
elif frame == "memos-api":
speaker_a_user_id = conv_id + "_speaker_a"
speaker_b_user_id = conv_id + "_speaker_b"
client = memos_client(mode="api")
reversed_client = memos_client(mode="api")
else:
client = get_client(frame, conv_id, version)

Expand All @@ -372,7 +431,7 @@ def process_qa(qa):
if qa.get("category") == 5:
return None
context, duration_ms = search_query(
client, query, metadata, frame, reversed_client=reversed_client, top_k=top_k
client, query, metadata, frame, version, reversed_client=reversed_client, top_k=top_k
)

if not context:
Expand Down Expand Up @@ -439,7 +498,7 @@ def main(frame, version="default", num_workers=1, top_k=20):
parser.add_argument(
"--lib",
type=str,
choices=["zep", "memos", "mem0", "mem0_graph", "langmem"],
choices=["zep", "memos", "mem0", "mem0_graph", "memos-api"],
help="Specify the memory framework (zep or memos or mem0 or mem0_graph)",
)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion evaluation/scripts/longmemeval/lme_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ async def main(frame, version, nlp_options, num_runs=3, num_workers=5):
parser.add_argument(
"--lib",
type=str,
choices=["mem0-local", "mem0-api"],
choices=["mem0-local", "mem0-api", "memos-local"],
)
parser.add_argument(
"--version", type=str, default="v1", help="Version of the evaluation framework."
Expand Down
7 changes: 4 additions & 3 deletions evaluation/scripts/longmemeval/lme_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def ingest_session(session, date, user_id, session_id, frame, client):
print(
f"\033[90m[{frame}]\033[0m ✅ Session \033[1;94m{session_id}\033[0m: Ingested \033[93m{len(messages)}\033[0m messages at \033[92m{date.isoformat()}\033[0m"
)
elif frame == "memos-local":
elif frame == "memos-local" or frame == "memos-api":
for idx, msg in enumerate(session):
messages.append(
{
Expand Down Expand Up @@ -126,7 +126,8 @@ def ingest_conv(lme_df, version, conv_idx, frame, num_workers=2):
addorsearch="add",
)
print("🔌 \033[1mUsing \033[94mMemos Local client\033[0m \033[1mfor ingestion...\033[0m")

elif frame == "memos-api":
client = memos_client(mode="api")
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = []

Expand Down Expand Up @@ -193,7 +194,7 @@ def main(frame, version, num_workers=2):
parser.add_argument(
"--lib",
type=str,
choices=["mem0-local", "mem0-api", "memos-local"],
choices=["mem0-local", "mem0-api", "memos-local", "memos-api"],
)
parser.add_argument(
"--version", type=str, default="v1", help="Version of the evaluation framework."
Expand Down
6 changes: 1 addition & 5 deletions evaluation/scripts/longmemeval/lme_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,7 @@ def calculate_scores(data, grade_path, output_path):

if __name__ == "__main__":
parser = argparse.ArgumentParser("LongMemeval Analysis Eval Metric Script")
parser.add_argument(
"--lib",
type=str,
choices=["mem0-local", "mem0-api"],
)
parser.add_argument("--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local"])
parser.add_argument(
"--version", type=str, default="v1", help="Version of the evaluation framework."
)
Expand Down
6 changes: 1 addition & 5 deletions evaluation/scripts/longmemeval/lme_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,7 @@ def main(frame, version, num_workers=4):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="LongMemeval Response Generation Script")
parser.add_argument(
"--lib",
type=str,
choices=["mem0-local", "mem0-api"],
)
parser.add_argument("--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local"])
parser.add_argument(
"--version", type=str, default="v1", help="Version of the evaluation framework."
)
Expand Down
30 changes: 21 additions & 9 deletions evaluation/scripts/longmemeval/lme_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,20 @@ def mem0_search(client, user_id, query, top_k=20, enable_graph=False, frame="mem
return context, duration_ms


def memos_search(client, user_id, query, frame="memos-local"):
def memos_search(client, user_id, query, top_k, frame="memos-local"):
start = time()
if frame == "memos-local":
results = client.search(
query=query,
user_id=user_id,
)

results = client.search(
query=query,
user_id=user_id,
)
results = filter_memory_data(results)["text_mem"][0]["memories"]
search_memories = "\n".join([f" - {item['memory']}" for item in results])

search_memories = filter_memory_data(results)["text_mem"][0]["memories"]
elif frame == "memos-api":
results = client.search(query=query, user_id=user_id, top_k=top_k)
search_memories = "\n".join([f" - {item}" for item in results])
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories)

duration_ms = (time() - start) * 1000
Expand Down Expand Up @@ -177,15 +182,20 @@ def process_user(lme_df, conv_idx, frame, version, top_k=20):
mode="local",
db_name=f"lme_{frame}-{version}-{user_id.replace('_', '')}",
user_id=user_id,
top_k=20,
top_k=top_k,
mem_cube_path=f"results/lme/{frame}-{version}/storages/{user_id}",
mem_cube_config_path="configs/mem_cube_config.json",
mem_os_config_path="configs/mos_memos_config.json",
addorsearch="search",
)
print("🔌 \033[1mUsing \033[94mMemos Local client\033[0m \033[1mfor search...\033[0m")
context, duration_ms = memos_search(client, user_id, question, frame=frame)

elif frame == "memos-api":
client = memos_client(
mode="api",
)
print("🔌 \033[1mUsing \033[94mMemos API client\033[0m \033[1mfor search...\033[0m")
context, duration_ms = memos_search(client, user_id, question, top_k=top_k, frame=frame)
search_results[user_id].append(
{
"question": question,
Expand Down Expand Up @@ -282,7 +292,9 @@ def main(frame, version, top_k=20, num_workers=2):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="LongMemeval Search Script")
parser.add_argument("--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local"])
parser.add_argument(
"--lib", type=str, choices=["mem0-local", "mem0-api", "memos-local", "memos-api"]
)
parser.add_argument(
"--version", type=str, default="v1", help="Version of the evaluation framework."
)
Expand Down
Loading