Skip to content

Commit b1efa60

Browse files
authored
Fix search dedup to remove duplicate memory content (#722)
Co-authored-by: [email protected] <>
1 parent 59efc4f commit b1efa60

File tree

2 files changed

+29
-6
lines changed

2 files changed

+29
-6
lines changed

src/memos/api/handlers/chat_handler.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -688,14 +688,24 @@ def generate_chat_response() -> Generator[str, None, None]:
688688
def _dedup_and_supplement_memories(
689689
self, first_filtered_memories: list, second_filtered_memories: list
690690
) -> list:
691-
"""Remove memory from second_filtered_memories that already exists in first_filtered_memories, return remaining memories"""
692-
# Create a set of IDs from first_filtered_memories for efficient lookup
693-
first_memory_ids = {memory["id"] for memory in first_filtered_memories}
691+
"""
692+
Remove memories from second_filtered_memories whose content already exists in
693+
first_filtered_memories, return the remaining list.
694+
"""
695+
696+
def _norm(text: str) -> str:
697+
# Use normalized text as the dedup key; keep original text in the payload.
698+
return " ".join(text.split())
699+
700+
first_memory_texts = {_norm(memory.get("memory", "")) for memory in first_filtered_memories}
694701

695702
remaining_memories = []
696703
for memory in second_filtered_memories:
697-
if memory["id"] not in first_memory_ids:
698-
remaining_memories.append(memory)
704+
key = _norm(memory.get("memory", ""))
705+
if key in first_memory_texts:
706+
continue
707+
first_memory_texts.add(key)
708+
remaining_memories.append(memory)
699709
return remaining_memories
700710

701711
def _get_internet_reference(

src/memos/multi_mem_cube/single_cube.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,20 @@ def _fine_search(
360360
logger.info(
361361
f"Added {len(additional_memories)} more memories. Total enhanced memories: {len(enhanced_memories)}"
362362
)
363-
formatted_memories = [format_memory_item(data) for data in enhanced_memories]
363+
364+
def _dedup_by_content(memories: list) -> list:
365+
seen = set()
366+
unique_memories = []
367+
for mem in memories:
368+
key = " ".join(mem.memory.split())
369+
if key in seen:
370+
continue
371+
seen.add(key)
372+
unique_memories.append(mem)
373+
return unique_memories
374+
375+
deduped_memories = _dedup_by_content(enhanced_memories)
376+
formatted_memories = [format_memory_item(data) for data in deduped_memories]
364377

365378
logger.info(f"Found {len(formatted_memories)} memories for user {search_req.user_id}")
366379

0 commit comments

Comments
 (0)