Skip to content

Commit e069928

Browse files
Wang-Daojiyuan.wangfridayL
authored
modify code in evaluation (#392)
* modify code in evaluation * modify code in evaluation --------- Co-authored-by: yuan.wang <[email protected]> Co-authored-by: chunyu li <[email protected]>
1 parent f6e96d5 commit e069928

File tree

12 files changed

+41
-118
lines changed

12 files changed

+41
-118
lines changed

evaluation/.env-example

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,8 @@ SUPERMEMORY_API_KEY="sm_xxx"
2222
MEMOBASE_API_KEY="xxx"
2323
MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"
2424

25-
# pref
26-
PRE_SPLIT_CHUNK=false # pre split chunk in client end, for personamem and prefeval
27-
# 1. text_mem + pref_mem + instruction_completion: set INSTRUCT_COMPLETE=true, ABLATION_PREF=false
28-
# 2. text_mem + pref_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=false
29-
# 3. text_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=true
30-
INSTRUCT_COMPLETE=true # use instruct complete format or not
31-
ABLATION_PREF=false # remove pref mem, only text mem
25+
# eval settings
26+
PRE_SPLIT_CHUNK=false
3227

3328
# Configuration Only For Scheduler
3429
# RabbitMQ Configuration

evaluation/scripts/PrefEval/pref_memos.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
7272
"""
7373
Processes a single line of data, searching memory based on the question.
7474
"""
75-
from utils.pref_mem_utils import create_mem_string
7675

7776
i, line = line_data
7877
try:
@@ -94,7 +93,13 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
9493
start_time_search = time.monotonic()
9594
relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
9695
search_memories_duration = time.monotonic() - start_time_search
97-
memories_str = create_mem_string(relevant_memories)
96+
memories_str = (
97+
"\n".join(
98+
f"- {entry.get('memory', '')}"
99+
for entry in relevant_memories["text_mem"][0]["memories"]
100+
)
101+
+ f"\n{relevant_memories['pref_mem']}"
102+
)
98103

99104
memory_tokens_used = len(tokenizer.encode(memories_str))
100105

@@ -119,7 +124,6 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
119124
"""
120125
Generates a response for a single line of data using pre-fetched memories.
121126
"""
122-
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
123127
from utils.prompts import PREFEVAL_ANSWER_PROMPT
124128

125129
i, line = line_data
@@ -146,10 +150,7 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
146150
)
147151
return original_data
148152

149-
memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)
150-
151-
template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
152-
system_prompt = template.format(context=memories_str)
153+
system_prompt = PREFEVAL_ANSWER_PROMPT.format(context=memories_str)
153154
messages = [
154155
{"role": "system", "content": system_prompt},
155156
{"role": "user", "content": question},

evaluation/scripts/locomo/locomo_responses.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str
3535
question=question,
3636
)
3737
else:
38-
from utils.pref_mem_utils import add_pref_instruction
39-
40-
template = add_pref_instruction(ANSWER_PROMPT_MEMOS, frame=frame)
41-
prompt = template.format(
38+
prompt = ANSWER_PROMPT_MEMOS.format(
4239
context=context,
4340
question=question,
4441
)
@@ -55,16 +52,13 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str
5552

5653

5754
async def process_qa(frame, qa, search_result, oai_client):
58-
from utils.pref_mem_utils import remove_pref_mem_from_mem_string
59-
6055
start = time()
6156
query = qa.get("question")
6257
gold_answer = qa.get("answer")
6358
qa_category = qa.get("category")
6459

6560
context = search_result.get("context")
6661

67-
context = remove_pref_mem_from_mem_string(context, frame)
6862
answer = await locomo_response(frame, oai_client, context, query)
6963

7064
response_duration_ms = (time() - start) * 1000

evaluation/scripts/locomo/locomo_search.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,19 @@ def memos_api_search(
100100
client, query, speaker_a_user_id, speaker_b_user_id, top_k, speaker_a, speaker_b
101101
):
102102
from prompts import TEMPLATE_MEMOS
103-
from utils.pref_mem_utils import create_mem_string
104103

105104
start = time()
106105
search_a_results = client.search(query=query, user_id=speaker_a_user_id, top_k=top_k)
107106
search_b_results = client.search(query=query, user_id=speaker_b_user_id, top_k=top_k)
108107

109-
speaker_a_context = create_mem_string(search_a_results)
110-
speaker_b_context = create_mem_string(search_b_results)
108+
speaker_a_context = (
109+
"\n".join([i["memory"] for i in search_a_results["text_mem"][0]["memories"]])
110+
+ f"\n{search_a_results['pref_mem']}"
111+
)
112+
speaker_b_context = (
113+
"\n".join([i["memory"] for i in search_b_results["text_mem"][0]["memories"]])
114+
+ f"\n{search_b_results['pref_mem']}"
115+
)
111116

112117
context = TEMPLATE_MEMOS.format(
113118
speaker_1=speaker_a,

evaluation/scripts/locomo/prompts.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,3 @@
1-
import os
2-
3-
4-
PREF_INSTRUCTIONS = """
5-
# Note:
6-
Plaintext memory are summaries of facts, while preference memories are summaries of user preferences.
7-
Your response must not violate any of the user's preferences, whether explicit or implicit, and briefly explain why you answer this way to avoid conflicts.
8-
When encountering preference conflicts, the priority is: explicit preference > implicit preference > plaintext memory.
9-
"""
10-
11-
121
ANSWER_PROMPT_MEM0 = """
132
You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
143
@@ -114,18 +103,14 @@
114103
5. Formulate a precise, concise answer based on the evidence from the memories (and allowed world knowledge).
115104
6. Double-check that your answer directly addresses the question asked and adheres to all instructions.
116105
7. Ensure your final answer is specific and avoids vague time references.
117-
{pref_instructions}
106+
118107
{context}
119108
120109
Question: {question}
121110
122111
Answer:
123112
"""
124113

125-
if os.getenv("INSTRUCT_COMPLETE") == "true":
126-
ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", PREF_INSTRUCTIONS)
127-
else:
128-
ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", "")
129114

130115
custom_instructions = """
131116
Generate personal memories that follow these guidelines:

evaluation/scripts/longmemeval/lme_responses.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,11 @@
1212

1313

1414
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15-
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
1615
from utils.prompts import LME_ANSWER_PROMPT
1716

1817

19-
def lme_response(llm_client, context, question, question_date, frame):
20-
template = add_pref_instruction(LME_ANSWER_PROMPT, frame=frame)
21-
prompt = template.format(
18+
def lme_response(llm_client, context, question, question_date):
19+
prompt = LME_ANSWER_PROMPT.format(
2220
question=question,
2321
question_date=question_date,
2422
context=context,
@@ -35,14 +33,13 @@ def lme_response(llm_client, context, question, question_date, frame):
3533
return result
3634

3735

38-
def process_qa(user_id, search_result, llm_client, frame):
36+
def process_qa(user_id, search_result, llm_client):
3937
start = time()
4038
search_result = search_result[0]
4139
question = search_result.get("question")
4240
question_date = search_result.get("date")
4341
context = search_result.get("search_context", "")
44-
context = remove_pref_mem_from_mem_string(context, frame=frame)
45-
anwer = lme_response(llm_client, context, question, question_date, frame)
42+
anwer = lme_response(llm_client, context, question, question_date)
4643

4744
response_duration_ms = (time() - start) * 1000
4845

@@ -97,7 +94,7 @@ def main(frame, version, num_workers=4):
9794
future_to_user_id = {}
9895

9996
for user_id, search_results in lme_search_results.items():
100-
future = executor.submit(process_qa, user_id, search_results, oai_client, frame)
97+
future = executor.submit(process_qa, user_id, search_results, oai_client)
10198
future_to_user_id[future] = user_id
10299

103100
for future in tqdm(

evaluation/scripts/longmemeval/lme_search.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import pandas as pd
1414

1515
from tqdm import tqdm
16-
from utils.pref_mem_utils import create_mem_string
1716
from utils.prompts import (
1817
MEM0_CONTEXT_TEMPLATE,
1918
MEM0_GRAPH_CONTEXT_TEMPLATE,
@@ -45,7 +44,10 @@ def mem0_search(client, query, user_id, top_k):
4544
def memos_search(client, query, user_id, top_k):
4645
start = time()
4746
results = client.search(query=query, user_id=user_id, top_k=top_k)
48-
context = create_mem_string(results)
47+
context = (
48+
"\n".join([i["memory"] for i in results["text_mem"][0]["memories"]])
49+
+ f"\n{results['pref_mem']}"
50+
)
4951
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=context)
5052
duration_ms = (time() - start) * 1000
5153
return context, duration_ms

evaluation/scripts/personamem/pm_responses.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
1515
import re
1616

17-
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
1817
from utils.prompts import PM_ANSWER_PROMPT
1918

2019

@@ -49,9 +48,8 @@ def _extract_only_options(text):
4948
return False, predicted_answer
5049

5150

52-
def pm_response(llm_client, context, question, options, frame):
53-
template = add_pref_instruction(PM_ANSWER_PROMPT, frame=frame)
54-
prompt = template.format(
51+
def pm_response(llm_client, context, question, options):
52+
prompt = PM_ANSWER_PROMPT.format(
5553
question=question,
5654
context=context,
5755
options=options,
@@ -68,19 +66,17 @@ def pm_response(llm_client, context, question, options, frame):
6866
return result
6967

7068

71-
def process_qa(user_id, search_result, num_runs, llm_client, frame):
69+
def process_qa(user_id, search_result, num_runs, llm_client):
7270
search_result = search_result[0]
7371
question = search_result.get("question")
7472
context = search_result.get("search_context", "")
7573
options = search_result.get("all_options", [])
7674

77-
context = remove_pref_mem_from_mem_string(context, frame=frame)
78-
7975
run_results = []
8076

8177
for idx in range(num_runs):
8278
start = time()
83-
answer = pm_response(llm_client, context, question, options, frame)
79+
answer = pm_response(llm_client, context, question, options)
8480
is_correct, answer = extract_choice_answer(answer, search_result.get("golden_answer", ""))
8581
response_duration_ms = (time() - start) * 1000
8682

@@ -154,9 +150,7 @@ def main(frame, version, num_runs=3, num_workers=4):
154150
future_to_user_id = {}
155151

156152
for user_id, search_results in pm_search_results.items():
157-
future = executor.submit(
158-
process_qa, user_id, search_results, num_runs, oai_client, frame
159-
)
153+
future = executor.submit(process_qa, user_id, search_results, num_runs, oai_client)
160154
future_to_user_id[future] = user_id
161155

162156
for future in tqdm(

evaluation/scripts/personamem/pm_search.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
1616

17-
from utils.pref_mem_utils import create_mem_string
1817
from utils.prompts import (
1918
MEM0_CONTEXT_TEMPLATE,
2019
MEM0_GRAPH_CONTEXT_TEMPLATE,
@@ -83,7 +82,10 @@ def memobase_search(client, query, user_id, top_k):
8382
def memos_search(client, user_id, query, top_k):
8483
start = time()
8584
results = client.search(query=query, user_id=user_id, top_k=top_k)
86-
search_memories = create_mem_string(results)
85+
search_memories = (
86+
"\n".join(item["memory"] for cube in results["text_mem"] for item in cube["memories"])
87+
+ f"\n{results['pref_mem']}"
88+
)
8789
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories)
8890

8991
duration_ms = (time() - start) * 1000

evaluation/scripts/utils/pref_mem_utils.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)