Skip to content

Commit 7eb531b

Browse files
Wang-Daojiyuan.wang
andauthored
Feat/pref optimize update (#409)
* add hybrid search and fine extractor * add dialog and modify spliter chunk * optmize the update and retriever code * modify pref field * add pref mem update srategy * add pref mem update srategy * fix bug in pre_commit --------- Co-authored-by: yuan.wang <[email protected]>
1 parent f8859f1 commit 7eb531b

File tree

19 files changed

+833
-158
lines changed

19 files changed

+833
-158
lines changed

docker/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,4 @@ volcengine-python-sdk==4.0.6
157157
watchfiles==1.1.0
158158
websockets==15.0.1
159159
xlrd==2.0.2
160-
xlsxwriter==3.2.5
160+
xlsxwriter==3.2.5

evaluation/scripts/PrefEval/pref_memos.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ def add_memory_for_line(
5353
if os.getenv("PRE_SPLIT_CHUNK", "false").lower() == "true":
5454
for chunk_start in range(0, len(conversation), turns_add * 2):
5555
chunk = conversation[chunk_start : chunk_start + turns_add * 2]
56-
mem_client.add(messages=chunk, user_id=user_id, conv_id=None)
56+
mem_client.add(messages=chunk, user_id=user_id, conv_id=None, batch_size=2)
5757
else:
58-
mem_client.add(messages=conversation, user_id=user_id, conv_id=None)
58+
mem_client.add(messages=conversation, user_id=user_id, conv_id=None, batch_size=2)
5959
end_time_add = time.monotonic()
6060
add_duration = end_time_add - start_time_add
6161

@@ -98,7 +98,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
9898
f"- {entry.get('memory', '')}"
9999
for entry in relevant_memories["text_mem"][0]["memories"]
100100
)
101-
+ f"\n{relevant_memories['pref_mem']}"
101+
+ f"\n{relevant_memories['pref_string']}"
102102
)
103103

104104
memory_tokens_used = len(tokenizer.encode(memories_str))

evaluation/scripts/locomo/locomo_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ def memos_api_search(
107107

108108
speaker_a_context = (
109109
"\n".join([i["memory"] for i in search_a_results["text_mem"][0]["memories"]])
110-
+ f"\n{search_a_results['pref_mem']}"
110+
+ f"\n{search_a_results['pref_string']}"
111111
)
112112
speaker_b_context = (
113113
"\n".join([i["memory"] for i in search_b_results["text_mem"][0]["memories"]])
114-
+ f"\n{search_b_results['pref_mem']}"
114+
+ f"\n{search_b_results['pref_string']}"
115115
)
116116

117117
context = TEMPLATE_MEMOS.format(

evaluation/scripts/longmemeval/lme_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def memos_search(client, query, user_id, top_k):
4646
results = client.search(query=query, user_id=user_id, top_k=top_k)
4747
context = (
4848
"\n".join([i["memory"] for i in results["text_mem"][0]["memories"]])
49-
+ f"\n{results['pref_mem']}"
49+
+ f"\n{results['pref_string']}"
5050
)
5151
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=context)
5252
duration_ms = (time() - start) * 1000

evaluation/scripts/personamem/pm_ingestion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ def ingest_session(session, user_id, session_id, frame, client):
3131
if os.getenv("PRE_SPLIT_CHUNK") == "true":
3232
for i in range(0, len(session), 10):
3333
messages = session[i : i + 10]
34-
client.add(messages=messages, user_id=user_id, conv_id=session_id)
34+
client.add(messages=messages, user_id=user_id, conv_id=session_id, batch_size=2)
3535
print(f"[{frame}] ✅ Session [{session_id}]: Ingested {len(messages)} messages")
3636
else:
37-
client.add(messages=session, user_id=user_id, conv_id=session_id)
37+
client.add(messages=session, user_id=user_id, conv_id=session_id, batch_size=2)
3838
print(f"[{frame}] ✅ Session [{session_id}]: Ingested {len(session)} messages")
3939
elif frame == "memobase":
4040
for _idx, msg in enumerate(session):

evaluation/scripts/personamem/pm_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def memos_search(client, user_id, query, top_k):
8484
results = client.search(query=query, user_id=user_id, top_k=top_k)
8585
search_memories = (
8686
"\n".join(item["memory"] for cube in results["text_mem"] for item in cube["memories"])
87-
+ f"\n{results['pref_mem']}"
87+
+ f"\n{results['pref_string']}"
8888
)
8989
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories)
9090

evaluation/scripts/utils/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ def search(self, query, user_id, top_k):
182182
"conversation_id": "",
183183
"top_k": top_k,
184184
"mode": "mixture",
185+
"handle_pref_mem": False,
185186
},
186187
ensure_ascii=False,
187188
)

src/memos/api/routers/server_router.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,18 +303,15 @@ def _post_process_pref_mem(
303303
mem_cube_id: str,
304304
handle_pref_mem: bool,
305305
):
306-
if os.getenv("RETURN_ORIGINAL_PREF_MEM", "false").lower() == "true" and pref_formatted_mem:
307-
memories_result["prefs"] = []
308-
memories_result["prefs"].append(
306+
if handle_pref_mem:
307+
memories_result["pref_mem"].append(
309308
{
310309
"cube_id": mem_cube_id,
311310
"memories": pref_formatted_mem,
312311
}
313312
)
314-
315-
if handle_pref_mem:
316313
pref_instruction: str = instruct_completion(pref_formatted_mem)
317-
memories_result["pref_mem"] = pref_instruction
314+
memories_result["pref_string"] = pref_instruction
318315

319316
return memories_result
320317

@@ -333,7 +330,8 @@ def search_memories(search_req: APISearchRequest):
333330
"text_mem": [],
334331
"act_mem": [],
335332
"para_mem": [],
336-
"pref_mem": "",
333+
"pref_mem": [],
334+
"pref_string": "",
337335
}
338336

339337
search_mode = search_req.mode

src/memos/memories/textual/item.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ class PreferenceTextualMemoryMetadata(TextualMemoryMetadata):
194194
default="explicit_preference", description="Type of preference."
195195
)
196196
dialog_id: str | None = Field(default=None, description="ID of the dialog.")
197-
dialog_str: str | None = Field(default=None, description="String of the dialog.")
197+
original_text: str | None = Field(default=None, description="String of the dialog.")
198198
embedding: list[float] | None = Field(default=None, description="Vector of the dialog.")
199199
explicit_preference: str | None = Field(default=None, description="Explicit preference.")
200200
created_at: str | None = Field(default=None, description="Timestamp of the dialog.")

0 commit comments

Comments
 (0)