Skip to content

Commit 2a6a0f2

Browse files
authored
Feat/reorg dev (#202)
1 parent 5bfac00 commit 2a6a0f2

File tree

6 files changed

+69
-395
lines changed

6 files changed

+69
-395
lines changed

src/memos/memories/textual/tree_text_memory/organize/conflict.py renamed to src/memos/memories/textual/tree_text_memory/organize/handler.py

Lines changed: 30 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import json
22
import re
3-
43
from datetime import datetime
54

65
from dateutil import parser
@@ -11,82 +10,68 @@
1110
from memos.log import get_logger
1211
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
1312
from memos.templates.tree_reorganize_prompts import (
14-
CONFLICT_DETECTOR_PROMPT,
15-
CONFLICT_RESOLVER_PROMPT,
13+
MEMORY_RELATION_DETECTOR_PROMPT,
14+
MEMORY_RELATION_RESOLVER_PROMPT,
1615
)
1716

18-
1917
logger = get_logger(__name__)
2018

2119

22-
class ConflictHandler:
20+
class NodeHandler:
2321
EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider conflict
2422

2523
def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
2624
self.graph_store = graph_store
2725
self.llm = llm
2826
self.embedder = embedder
2927

30-
def detect(
31-
self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
32-
) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
33-
"""
34-
Detect conflicts by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
35-
Args:
36-
memory: The memory item (should have an embedding attribute or field).
37-
top_k: Number of top similar nodes to retrieve.
38-
scope: Optional memory type filter.
39-
Returns:
40-
List of conflict pairs (each pair is a tuple: (memory, candidate)).
41-
"""
28+
def detect(self, memory, top_k: int = 5, scope=None):
4229
# 1. Search for similar memories based on embedding
4330
embedding = memory.metadata.embedding
4431
embedding_candidates_info = self.graph_store.search_by_embedding(
45-
embedding, top_k=top_k, scope=scope
32+
embedding, top_k=top_k, scope=scope, threshold=self.EMBEDDING_THRESHOLD
4633
)
4734
# 2. Filter based on similarity threshold
4835
embedding_candidates_ids = [
49-
info["id"]
50-
for info in embedding_candidates_info
51-
if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
36+
info["id"] for info in embedding_candidates_info if info["id"] != memory.id
5237
]
5338
# 3. Judge conflicts using LLM
5439
embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
55-
conflict_pairs = []
40+
detected_relationships = []
5641
for embedding_candidate in embedding_candidates:
5742
embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
5843
prompt = [
59-
{
60-
"role": "system",
61-
"content": "You are a conflict detector for memory items.",
62-
},
6344
{
6445
"role": "user",
65-
"content": CONFLICT_DETECTOR_PROMPT.format(
66-
statement_1=memory.memory,
67-
statement_2=embedding_candidate.memory,
46+
"content": MEMORY_RELATION_DETECTOR_PROMPT.format(
47+
statement_1=memory.memory, statement_2=embedding_candidate.memory
6848
),
69-
},
49+
}
7050
]
7151
result = self.llm.generate(prompt).strip()
72-
if "yes" in result.lower():
73-
conflict_pairs.append([memory, embedding_candidate])
74-
if len(conflict_pairs):
75-
conflict_text = "\n".join(
76-
f'"{pair[0].memory!s}" <==CONFLICT==> "{pair[1].memory!s}"'
77-
for pair in conflict_pairs
78-
)
79-
logger.warning(
80-
f"Detected {len(conflict_pairs)} conflicts for memory {memory.id}\n {conflict_text}"
81-
)
82-
return conflict_pairs
52+
if result == "contradictory":
53+
logger.warning(
54+
f'detected "{memory.memory}" <==CONFLICT==> "{embedding_candidate.memory}"'
55+
)
56+
detected_relationships.append([memory, embedding_candidate, "contradictory"])
57+
elif result == "redundant":
58+
logger.warning(
59+
f'detected "{memory.memory}" <==REDUNDANT==> "{embedding_candidate.memory}"'
60+
)
61+
detected_relationships.append([memory, embedding_candidate, "redundant"])
62+
elif result == "independent":
63+
pass
64+
else:
65+
pass
66+
return detected_relationships
8367

84-
def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
68+
def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem, relation) -> None:
8569
"""
8670
Resolve detected conflicts between two memory items using LLM fusion.
8771
Args:
8872
memory_a: The first conflicting memory item.
8973
memory_b: The second conflicting memory item.
74+
relation: relation
9075
Returns:
9176
A fused TextualMemoryItem representing the resolved memory.
9277
"""
@@ -96,13 +81,10 @@ def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> N
9681
metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
9782
metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
9883
prompt = [
99-
{
100-
"role": "system",
101-
"content": "",
102-
},
10384
{
10485
"role": "user",
105-
"content": CONFLICT_RESOLVER_PROMPT.format(
86+
"content": MEMORY_RELATION_RESOLVER_PROMPT.format(
87+
relation=relation,
10688
statement_1=memory_a.memory,
10789
metadata_1=metadata_1,
10890
statement_2=memory_b.memory,
@@ -119,7 +101,7 @@ def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> N
119101
# —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
120102
if len(answer) <= 10 and "no" in answer.lower():
121103
logger.warning(
122-
f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
104+
f"{relation} between {memory_a.id} and {memory_b.id} could not be resolved. "
123105
)
124106
self._hard_update(memory_a, memory_b)
125107
# —————— 2.2 Conflict resolved, update metadata and memory ————

src/memos/memories/textual/tree_text_memory/organize/manager.py

Lines changed: 6 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -158,106 +158,18 @@ def _add_to_graph_memory(self, memory: TextualMemoryItem, memory_type: str):
158158
- topic_summary_prefix: summary node id prefix if applicable
159159
- enable_summary_link: whether to auto-link to a summary node
160160
"""
161-
embedding = memory.metadata.embedding
162-
163-
# Step 1: Find similar nodes for possible merging
164-
similar_nodes = self.graph_store.search_by_embedding(
165-
vector=embedding,
166-
top_k=3,
167-
scope=memory_type,
168-
threshold=self._threshold,
169-
status="activated",
170-
)
171-
172-
if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
173-
return self._merge(memory, similar_nodes)
174-
else:
175-
node_id = str(uuid.uuid4())
176-
# Step 2: Add new node to graph
177-
self.graph_store.add_node(
178-
node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
179-
)
180-
self.reorganizer.add_message(
181-
QueueMessage(
182-
op="add",
183-
after_node=[node_id],
184-
)
185-
)
186-
return node_id
187-
188-
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> str:
189-
"""
190-
TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
191-
192-
Merge the source memory into the most similar existing node (only one),
193-
and establish a MERGED_FROM edge in the graph.
194-
195-
Parameters:
196-
source_node: The new memory item (not yet in the graph)
197-
similar_nodes: A list of dicts returned by search_by_embedding(), ordered by similarity
198-
"""
199-
original_node = similar_nodes[0]
200-
original_id = original_node["id"]
201-
original_data = self.graph_store.get_node(original_id)
202-
203-
target_text = original_data.get("memory", "")
204-
merged_text = f"{target_text}\n⟵MERGED⟶\n{source_node.memory}"
205-
206-
original_meta = TreeNodeTextualMemoryMetadata(**original_data["metadata"])
207-
source_meta = source_node.metadata
208-
209-
merged_key = source_meta.key or original_meta.key
210-
merged_tags = list(set((original_meta.tags or []) + (source_meta.tags or [])))
211-
merged_sources = list(set((original_meta.sources or []) + (source_meta.sources or [])))
212-
merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
213-
merged_embedding = self.embedder.embed([merged_text])[0]
214-
215-
original_conf = original_meta.confidence or 0.0
216-
source_conf = source_meta.confidence or 0.0
217-
merged_confidence = float((original_conf + source_conf) / 2)
218-
merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
219-
220-
# Create new merged node
221-
merged_id = str(uuid.uuid4())
222-
merged_metadata = source_meta.model_copy(
223-
update={
224-
"embedding": merged_embedding,
225-
"updated_at": datetime.now().isoformat(),
226-
"key": merged_key,
227-
"tags": merged_tags,
228-
"sources": merged_sources,
229-
"background": merged_background,
230-
"confidence": merged_confidence,
231-
"usage": merged_usage,
232-
}
233-
)
234-
161+
node_id = str(uuid.uuid4())
162+
# Step 2: Add new node to graph
235163
self.graph_store.add_node(
236-
merged_id, merged_text, merged_metadata.model_dump(exclude_none=True)
164+
node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
237165
)
238-
239-
# Add traceability edges: both original and new point to merged node
240-
self.graph_store.add_edge(original_id, merged_id, type="MERGED_TO")
241-
self.graph_store.update_node(original_id, {"status": "archived"})
242-
source_id = str(uuid.uuid4())
243-
source_metadata = source_node.metadata.model_copy(update={"status": "archived"})
244-
self.graph_store.add_node(source_id, source_node.memory, source_metadata.model_dump())
245-
self.graph_store.add_edge(source_id, merged_id, type="MERGED_TO")
246-
# After creating merged node and tracing lineage
247-
self._inherit_edges(original_id, merged_id)
248-
249-
# log to reorganizer before updating the graph
250166
self.reorganizer.add_message(
251167
QueueMessage(
252-
op="merge",
253-
before_node=[
254-
original_id,
255-
source_node.id,
256-
],
257-
after_node=[merged_id],
168+
op="add",
169+
after_node=[node_id],
258170
)
259171
)
260-
return merged_id
172+
return node_id
261173

262174
def _inherit_edges(self, from_id: str, to_id: str) -> None:
263175
"""

0 commit comments

Comments
 (0)