Skip to content

Commit 0b2b6ed

Browse files
Wang-Daojiyuan.wang
andauthored
Feat/merge inst cplt to dev (#388)
* add preference text memory * finish milvus support * add new builder * finish prefer textual memory base level * modify code struct * modify pref module * implement remain preference function * modify preference.py * modify bug in milvus * finish debug * modify user pref user id code * modify bug in milvus * finish debug in core * repair bug in milvus get_all * add pref mem esarch time in core * modify search for pref mem in product..py * add simple pref memos example * modify bug in examples/mem_os/simple_prefs_memos_product.py * repair bug in user id related part * modify search * repair bug in slow update * modify define error in extractor -> extract_implicit_preferences * reapair define error in extractor and modify split func in spliter * modify code * modify adder * optimize the code * repair bug in adder and extractor * finish make test and make pre-commit * repair bug in preference * add memory field for milvusvecdbitem and modify related module * pref code clean * modify prompt of extractor * modify extractor * add reranker to pref mem * remove assember in pref mem * modify code * add op trace based update method in add * modify slow update in adder * modify implicit part code in extractor and add duplicate in utils * modify depulicate threshold * modify api config * reapir bug in adder about search relate * repair bug in core , dupicate search * add pref to new naive cube and server api * add async pref add by mem_schedular * modify * replace print to logger * repair bug from make pre-commit * inst cplt * align to liji cloud server * repair pkg problem * modify example of pref * pre_commit * fix api bug * merge inst_cplt to dev * fix pre commit * fix pre commit * fix pre commit error * modify code fllow reviewer * fix bug in make pre_commit * repair bug in server router * fix pre commit bug --------- Co-authored-by: yuan.wang <[email protected]>
1 parent 651e8df commit 0b2b6ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3105
-271
lines changed

docker/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,4 @@ volcengine-python-sdk==4.0.6
157157
watchfiles==1.1.0
158158
websockets==15.0.1
159159
xlrd==2.0.2
160-
xlsxwriter==3.2.5
160+
xlsxwriter==3.2.5

docs/openapi.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@
884884
"type": "string",
885885
"title": "Session Id",
886886
"description": "Session ID for the MOS. This is used to distinguish between different dialogue",
887-
"default": "0ce84b9c-0615-4b9d-83dd-fba50537d5d3"
887+
"default": "41bb5e18-252d-4948-918c-07d82aa47086"
888888
},
889889
"chat_model": {
890890
"$ref": "#/components/schemas/LLMConfigFactory",
@@ -939,6 +939,12 @@
939939
"description": "Enable parametric memory for the MemChat",
940940
"default": false
941941
},
942+
"enable_preference_memory": {
943+
"type": "boolean",
944+
"title": "Enable Preference Memory",
945+
"description": "Enable preference memory for the MemChat",
946+
"default": false
947+
},
942948
"enable_mem_scheduler": {
943949
"type": "boolean",
944950
"title": "Enable Mem Scheduler",

evaluation/.env-example

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,13 @@ SUPERMEMORY_API_KEY="sm_xxx"
2222
MEMOBASE_API_KEY="xxx"
2323
MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"
2424

25-
# eval settings
26-
PRE_SPLIT_CHUNK=false
27-
25+
# pref
26+
PRE_SPLIT_CHUNK=false # pre split chunk in client end, for personamem and prefeval
27+
# 1. text_mem + pref_mem + instruction_completion: set INSTRUCT_COMPLETE=true, ABLATION_PREF=false
28+
# 2. text_mem + pref_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=false
29+
# 3. text_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=true
30+
INSTRUCT_COMPLETE=true # use instruct complete format or not
31+
ABLATION_PREF=false # remove pref mem, only text mem
2832

2933
# Configuration Only For Scheduler
3034
# RabbitMQ Configuration
@@ -45,4 +49,4 @@ MEMSCHEDULER_GRAPHDBAUTH_URI=bolt://localhost:7687
4549
MEMSCHEDULER_GRAPHDBAUTH_USER=neo4j
4650
MEMSCHEDULER_GRAPHDBAUTH_PASSWORD=***
4751
MEMSCHEDULER_GRAPHDBAUTH_DB_NAME=neo4j
48-
MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true
52+
MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true

evaluation/scripts/PrefEval/pref_eval.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
1+
import argparse
12
import asyncio
23
import json
4+
import os
35
import re
4-
import argparse
5-
from typing import Dict, Any
6+
67
from collections import Counter
7-
from tqdm.asyncio import tqdm
8-
import os
8+
from typing import Any
9+
910
import pandas as pd
11+
1012
from dotenv import load_dotenv
1113
from openai import OpenAI
14+
from tqdm.asyncio import tqdm
15+
1216

1317
load_dotenv()
1418

@@ -30,7 +34,7 @@ async def call_gpt4o_mini_async(client: OpenAI, prompt: str) -> str:
3034
)
3135
return response.choices[0].message.content
3236
except Exception as e:
33-
print(f"API call failed: {str(e)}")
37+
print(f"API call failed: {e!s}")
3438
return ""
3539

3640

@@ -41,7 +45,7 @@ def parse_xml_response(response: str, tag: str) -> str:
4145

4246
async def evaluate_violate_preference_async(
4347
client: OpenAI, preference: str, question: str, response: str
44-
) -> Dict[str, str]:
48+
) -> dict[str, str]:
4549
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's response violates the user's stated preference.
4650
Evaluate the response based on these criteria:
4751
@@ -73,16 +77,16 @@ async def evaluate_violate_preference_async(
7377

7478
async def evaluate_acknowledge_preference_async(
7579
client: OpenAI, question: str, response: str
76-
) -> Dict[str, str]:
77-
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query.
80+
) -> dict[str, str]:
81+
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query.
7882
7983
Evaluate the response based on these stringent criteria:
8084
8185
1. Check if the response explicitly or implicitly mentions or references a user preference. 2. The content of the preference is irrelevant for this check; only its presence matters.
8286
8387
Answer "Yes" if:
8488
85-
1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'.
89+
1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'.
8690
2. The response assumes the user preference in answering the user's question implicitly. For example, when the user asks 'Can you recommend me cars to drive?', if the response is 'Based on your preference, I will recommend non-EV cars, ...', then this indicates the assistant assumes that the user's preference is a dislike of EV cars, and you should answer 'Yes'.
8791
8892
Answer "No" if the response does not mention or refer to any user preference explicitly or implicitly. If you cannot extract the sentence stating what the preference is, answer 'No'.
@@ -103,7 +107,7 @@ async def evaluate_acknowledge_preference_async(
103107

104108
async def evaluate_hallucinate_preference_async(
105109
client: OpenAI, preference: str, restatement: str
106-
) -> Dict[str, str]:
110+
) -> dict[str, str]:
107111
if not restatement.strip():
108112
return {"explanation": "No restatement provided by assistant", "answer": "No"}
109113
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's restatement of the user's stated preference is the same preference. Evaluate the response based on these stringent criteria to answer if the assistant has hallucinated the preference or not:
@@ -120,10 +124,10 @@ async def evaluate_hallucinate_preference_async(
120124
2. The assistant's restatement is a minor paraphrase that fully preserves the meaning and intent of the original preference.
121125
3. The restatement is just empty, no hallucination.
122126
123-
Here is the information:
124-
Original user preference: "{preference}"
125-
Assistant's restatement: "{restatement}"
126-
Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text:
127+
Here is the information:
128+
Original user preference: "{preference}"
129+
Assistant's restatement: "{restatement}"
130+
Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text:
127131
<explanation>[1 short sentence explanation]</explanation>
128132
<answer>[Yes/No]</answer>"""
129133

@@ -136,7 +140,7 @@ async def evaluate_hallucinate_preference_async(
136140

137141
async def evaluate_helpful_response_async(
138142
client: OpenAI, question: str, response: str
139-
) -> Dict[str, str]:
143+
) -> dict[str, str]:
140144
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant provides any substantive response to the user's query.
141145
Evaluate the response based on these stringent criteria:
142146
@@ -174,7 +178,7 @@ async def evaluate_helpful_response_async(
174178
}
175179

176180

177-
def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
181+
def classify_error_type(evaluation_results: dict[str, Any]) -> str:
178182
violate = evaluation_results["violate_preference"]["answer"]
179183
acknowledge = evaluation_results["acknowledge_preference"]["answer"]
180184
hallucinate = evaluation_results["hallucinate_preference"]["answer"]
@@ -192,7 +196,7 @@ def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
192196
return "Personalized Response"
193197

194198

195-
async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
199+
async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> dict[str, Any]:
196200
async with semaphore:
197201
data = json.loads(line.strip())
198202
preference = data["preference"]
@@ -223,7 +227,7 @@ async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore)
223227
return result
224228

225229

226-
def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[str, float]]:
230+
def log_summary(error_counter: Counter, total_samples: int) -> dict[str, dict[str, float]]:
227231
summary_data = {}
228232
print("\n--- Error Type Summary ---")
229233

@@ -247,7 +251,7 @@ def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[st
247251

248252

249253
def generate_excel_summary(
250-
summary_results: Dict[str, Dict[str, float]],
254+
summary_results: dict[str, dict[str, float]],
251255
avg_search_time: float,
252256
avg_context_tokens: float,
253257
avg_add_time: float,
@@ -317,7 +321,7 @@ async def main(concurrency_limit: int, input_file: str, output_file: str, output
317321
client = OpenAI(api_key=API_KEY, base_url=API_URL)
318322

319323
try:
320-
with open(input_file, "r", encoding="utf-8") as f:
324+
with open(input_file, encoding="utf-8") as f:
321325
lines = f.readlines()
322326
except FileNotFoundError:
323327
print(f"Error: Input file not found at '{input_file}'")

evaluation/scripts/PrefEval/pref_mem0.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import os
55
import sys
66
import time
7+
78
import tiktoken
9+
810
from dotenv import load_dotenv
11+
from irrelevant_conv import irre_10, irre_300
912
from openai import OpenAI
1013
from tqdm import tqdm
1114

12-
from irrelevant_conv import irre_10, irre_300
1315

1416
ROOT_DIR = os.path.dirname(
1517
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -199,7 +201,7 @@ def main():
199201
args = parser.parse_args()
200202

201203
try:
202-
with open(args.input, "r", encoding="utf-8") as infile:
204+
with open(args.input, encoding="utf-8") as infile:
203205
lines = infile.readlines()
204206
except FileNotFoundError:
205207
print(f"Error: Input file '{args.input}' not found")

evaluation/scripts/PrefEval/pref_memobase.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import os
55
import sys
66
import time
7+
78
import tiktoken
9+
810
from dotenv import load_dotenv
11+
from irrelevant_conv import irre_10, irre_300
912
from openai import OpenAI
1013
from tqdm import tqdm
11-
import time
12-
from irrelevant_conv import irre_10, irre_300
14+
1315

1416
ROOT_DIR = os.path.dirname(
1517
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -49,7 +51,7 @@ def add_memory_for_line(
4951
if conversation:
5052
messages = []
5153

52-
for chunk_start in range(0, len(conversation)):
54+
for chunk_start in range(len(conversation)):
5355
chunk = conversation[chunk_start : chunk_start + 1]
5456
timestamp_add = str(int(time.time() * 100))
5557
time.sleep(0.001) # Ensure unique timestamp
@@ -210,7 +212,7 @@ def main():
210212
args = parser.parse_args()
211213

212214
try:
213-
with open(args.input, "r", encoding="utf-8") as infile:
215+
with open(args.input, encoding="utf-8") as infile:
214216
lines = infile.readlines()
215217
except FileNotFoundError:
216218
print(f"Error: Input file '{args.input}' not found")

evaluation/scripts/PrefEval/pref_memos.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import os
55
import sys
66
import time
7+
78
import tiktoken
9+
810
from dotenv import load_dotenv
11+
from irrelevant_conv import irre_10, irre_300
912
from openai import OpenAI
1013
from tqdm import tqdm
1114

12-
from irrelevant_conv import irre_10, irre_300
1315

1416
ROOT_DIR = os.path.dirname(
1517
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -18,6 +20,8 @@
1820

1921
sys.path.insert(0, ROOT_DIR)
2022
sys.path.insert(0, EVAL_SCRIPTS_DIR)
23+
24+
2125
load_dotenv()
2226
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
2327
BASE_URL = os.getenv("OPENAI_BASE_URL")
@@ -68,6 +72,8 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
6872
"""
6973
Processes a single line of data, searching memory based on the question.
7074
"""
75+
from utils.pref_mem_utils import create_mem_string
76+
7177
i, line = line_data
7278
try:
7379
original_data = json.loads(line)
@@ -88,9 +94,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
8894
start_time_search = time.monotonic()
8995
relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
9096
search_memories_duration = time.monotonic() - start_time_search
91-
memories_str = "\n".join(
92-
f"- {entry.get('memory', '')}" for entry in relevant_memories["text_mem"][0]["memories"]
93-
)
97+
memories_str = create_mem_string(relevant_memories)
9498

9599
memory_tokens_used = len(tokenizer.encode(memories_str))
96100

@@ -111,10 +115,13 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
111115
return None
112116

113117

114-
def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
118+
def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str) -> dict:
115119
"""
116120
Generates a response for a single line of data using pre-fetched memories.
117121
"""
122+
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
123+
from utils.prompts import PREFEVAL_ANSWER_PROMPT
124+
118125
i, line = line_data
119126
try:
120127
original_data = json.loads(line)
@@ -139,7 +146,10 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
139146
)
140147
return original_data
141148

142-
system_prompt = f"You are a helpful AI. Answer the question based on the query and the following memories:\nUser Memories:\n{memories_str}"
149+
memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)
150+
151+
template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
152+
system_prompt = template.format(context=memories_str)
143153
messages = [
144154
{"role": "system", "content": system_prompt},
145155
{"role": "user", "content": question},
@@ -201,7 +211,7 @@ def main():
201211
args = parser.parse_args()
202212

203213
try:
204-
with open(args.input, "r", encoding="utf-8") as infile:
214+
with open(args.input, encoding="utf-8") as infile:
205215
lines = infile.readlines()
206216
except FileNotFoundError:
207217
print(f"Error: Input file '{args.input}' not found")
@@ -277,7 +287,7 @@ def main():
277287
concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
278288
):
279289
futures = [
280-
executor.submit(generate_response_for_line, (i, line), openai_client)
290+
executor.submit(generate_response_for_line, (i, line), openai_client, args.lib)
281291
for i, line in enumerate(lines)
282292
]
283293

evaluation/scripts/PrefEval/pref_memu.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
import os
55
import sys
66
import time
7+
8+
from datetime import datetime
9+
710
import tiktoken
11+
812
from dotenv import load_dotenv
13+
from irrelevant_conv import irre_10, irre_300
914
from openai import OpenAI
1015
from tqdm import tqdm
11-
from datetime import datetime
12-
from irrelevant_conv import irre_10, irre_300
16+
1317

1418
ROOT_DIR = os.path.dirname(
1519
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -205,7 +209,7 @@ def main():
205209
args = parser.parse_args()
206210

207211
try:
208-
with open(args.input, "r", encoding="utf-8") as infile:
212+
with open(args.input, encoding="utf-8") as infile:
209213
lines = infile.readlines()
210214
except FileNotFoundError:
211215
print(f"Error: Input file '{args.input}' not found")

evaluation/scripts/PrefEval/pref_supermemory.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import os
55
import sys
66
import time
7+
78
import tiktoken
9+
810
from dotenv import load_dotenv
11+
from irrelevant_conv import irre_10, irre_300
912
from openai import OpenAI
1013
from tqdm import tqdm
11-
from datetime import datetime
12-
from irrelevant_conv import irre_10, irre_300
14+
1315

1416
ROOT_DIR = os.path.dirname(
1517
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -201,7 +203,7 @@ def main():
201203
args = parser.parse_args()
202204

203205
try:
204-
with open(args.input, "r", encoding="utf-8") as infile:
206+
with open(args.input, encoding="utf-8") as infile:
205207
lines = infile.readlines()
206208
except FileNotFoundError:
207209
print(f"Error: Input file '{args.input}' not found")

0 commit comments

Comments
 (0)