Skip to content

Commit a2ec665

Browse files
authored
feat: illumination (#229)
* feat: illumination fix * fix: reorganize bug * fix: bug for schedular config * feat: add internet retrieve log * fix: config bug * feat: use internet search always if user input the config * feat: delete useless log; limit reorganizer length * fix: reorganizer * fix: search * fix: search
1 parent db794c4 commit a2ec665

File tree

4 files changed

+65
-31
lines changed

4 files changed

+65
-31
lines changed

src/memos/api/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def get_internet_config() -> dict[str, Any]:
136136
"max_tokens": 5000,
137137
"top_p": 0.95,
138138
"top_k": 20,
139-
"api_key": "EMPTY",
139+
"api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"),
140140
"api_base": os.getenv("MEMRADER_API_BASE"),
141141
"remove_think_prefix": True,
142142
"extra_body": {"chat_template_kwargs": {"enable_thinking": False}},

src/memos/graph_dbs/nebular.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,6 @@ def _parse_value(self, value: Any) -> Any:
15191519

15201520
return prim # already a Python primitive
15211521

1522-
@timed
15231522
def _parse_node(self, props: dict[str, Any]) -> dict[str, Any]:
15241523
parsed = {k: self._parse_value(v) for k, v in props.items()}
15251524

src/memos/memories/textual/tree_text_memory/organize/reorganizer.py

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def optimize_structure(
242242
except Exception as e:
243243
logger.warning(
244244
f"[Reorganize] Cluster processing "
245-
f"failed: {e}, trace: {traceback.format_exc()}"
245+
f"failed: {e}, cluster_nodes: {cluster_nodes}, trace: {traceback.format_exc()}"
246246
)
247247
logger.info("[GraphStructure Reorganize] Structure optimization finished.")
248248

@@ -333,7 +333,9 @@ def _process_cluster_and_write(
333333

334334
logger.info("[Reorganizer] Cluster relation/reasoning done.")
335335

336-
def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[GraphDBNode]]:
336+
def _local_subcluster(
337+
self, cluster_nodes: list[GraphDBNode], max_length: int = 8000
338+
) -> (list)[list[GraphDBNode]]:
337339
"""
338340
Use LLM to split a large cluster into semantically coherent sub-clusters.
339341
"""
@@ -347,7 +349,9 @@ def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[Graph
347349
scene_lines.append(line)
348350

349351
joined_scene = "\n".join(scene_lines)
350-
prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
352+
if len(joined_scene) > max_length:
353+
logger.warning(f"Sub-cluster too long: {joined_scene}")
354+
prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene[:max_length])
351355

352356
messages = [{"role": "user", "content": prompt}]
353357
response_text = self.llm.generate(messages)
@@ -394,41 +398,73 @@ def _partition(self, nodes, min_cluster_size: int = 10, max_cluster_size: int =
394398
)
395399
return [nodes]
396400

397-
def recursive_clustering(nodes_list):
401+
def recursive_clustering(nodes_list, depth=0):
398402
"""Recursively split clusters until each is <= max_cluster_size."""
403+
indent = " " * depth
404+
logger.info(
405+
f"{indent}[Recursive] Start clustering {len(nodes_list)} nodes at depth {depth}"
406+
)
407+
399408
if len(nodes_list) <= max_cluster_size:
409+
logger.info(
410+
f"{indent}[Recursive] Node count <= {max_cluster_size}, stop splitting."
411+
)
400412
return [nodes_list]
401-
402413
# Try kmeans with k = ceil(len(nodes) / max_cluster_size)
403-
x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
414+
x_nodes = [n for n in nodes_list if n.metadata.embedding]
415+
x = np.array([n.metadata.embedding for n in x_nodes])
416+
404417
if len(x) < min_cluster_size:
418+
logger.info(
419+
f"{indent}[Recursive] Too few embeddings ({len(x)}), skipping clustering."
420+
)
405421
return [nodes_list]
406422

407423
k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
408-
k = max(1, min(k, len(x)))
424+
k = max(1, k)
409425

410426
try:
427+
logger.info(f"{indent}[Recursive] Clustering with k={k} on {len(x)} points.")
411428
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
412429
labels = kmeans.fit_predict(x)
413430

414431
label_groups = defaultdict(list)
415-
for node, label in zip(nodes_list, labels, strict=False):
432+
for node, label in zip(x_nodes, labels, strict=False):
416433
label_groups[label].append(node)
417434

435+
# Map: label -> nodes with no embedding (fallback group)
436+
no_embedding_nodes = [n for n in nodes_list if not n.metadata.embedding]
437+
if no_embedding_nodes:
438+
logger.warning(
439+
f"{indent}[Recursive] {len(no_embedding_nodes)} nodes have no embedding. Added to largest cluster."
440+
)
441+
# Assign to largest cluster
442+
largest_label = max(label_groups.items(), key=lambda kv: len(kv[1]))[0]
443+
label_groups[largest_label].extend(no_embedding_nodes)
444+
418445
result = []
419-
for sub_group in label_groups.values():
420-
result.extend(recursive_clustering(sub_group))
446+
for label, sub_group in label_groups.items():
447+
logger.info(f"{indent} Cluster-{label}: {len(sub_group)} nodes")
448+
result.extend(recursive_clustering(sub_group, depth=depth + 1))
421449
return result
450+
422451
except Exception as e:
423-
logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
452+
logger.warning(
453+
f"{indent}[Recursive] Clustering failed: {e}, fallback to one cluster."
454+
)
424455
return [nodes_list]
425456

426457
raw_clusters = recursive_clustering(nodes)
427458
filtered_clusters = [c for c in raw_clusters if len(c) > min_cluster_size]
459+
460+
logger.info(f"[KMeansPartition] Total clusters before filtering: {len(raw_clusters)}")
461+
for i, cluster in enumerate(raw_clusters):
462+
logger.info(f"[KMeansPartition] Cluster-{i}: {len(cluster)} nodes")
463+
428464
logger.info(
429-
f"[KMeansPartition] Total clusters created: {len(raw_clusters)}, "
430-
f"kept {len(filtered_clusters)} (>{min_cluster_size})."
465+
f"[KMeansPartition] Clusters after filtering (>{min_cluster_size}): {len(filtered_clusters)}"
431466
)
467+
432468
return filtered_clusters
433469

434470
def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> GraphDBNode:

src/memos/memories/textual/tree_text_memory/retrieve/searcher.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,19 +145,18 @@ def _retrieve_paths(self, query, parsed_goal, query_embedding, info, top_k, mode
145145
memory_type,
146146
)
147147
)
148-
if parsed_goal.internet_search:
149-
tasks.append(
150-
executor.submit(
151-
self._retrieve_from_internet,
152-
query,
153-
parsed_goal,
154-
query_embedding,
155-
top_k,
156-
info,
157-
mode,
158-
memory_type,
159-
)
148+
tasks.append(
149+
executor.submit(
150+
self._retrieve_from_internet,
151+
query,
152+
parsed_goal,
153+
query_embedding,
154+
top_k,
155+
info,
156+
mode,
157+
memory_type,
160158
)
159+
)
161160

162161
results = []
163162
for t in tasks:
@@ -223,16 +222,16 @@ def _retrieve_from_internet(
223222
self, query, parsed_goal, query_embedding, top_k, info, mode, memory_type
224223
):
225224
"""Retrieve and rerank from Internet source"""
226-
if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
227-
logger.info(
228-
f"[PATH-C] '{query}' Skipped (no retriever, fast mode, or no internet_search flag)"
229-
)
225+
if not self.internet_retriever or mode == "fast":
226+
logger.info(f"[PATH-C] '{query}' Skipped (no retriever, fast mode)")
230227
return []
231228
if memory_type not in ["All"]:
232229
return []
230+
logger.info(f"[PATH-C] '{query}' Retrieving from internet...")
233231
items = self.internet_retriever.retrieve_from_internet(
234232
query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
235233
)
234+
logger.info(f"[PATH-C] '{query}' Retrieved from internet {len(items)} items: {items}")
236235
return self.reranker.rerank(
237236
query=query,
238237
query_embedding=query_embedding[0],

0 commit comments

Comments
 (0)