Skip to content

Commit 5b27384

Browse files
authored
feat: add default processing in mem-reader (#325)
1 parent 5639a91 commit 5b27384

File tree

1 file changed

+108
-70
lines changed

1 file changed

+108
-70
lines changed

src/memos/mem_reader/simple_struct.py

Lines changed: 108 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -56,44 +56,60 @@ def detect_lang(text):
5656

5757
def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder):
5858
# generate
59-
raw = llm.generate(message)
60-
if not raw:
59+
try:
60+
raw = llm.generate(message)
61+
if not raw:
62+
logger.warning(f"[LLM] Empty generation for input: {message}")
63+
return None
64+
except Exception as e:
65+
logger.error(f"[LLM] Exception during generation: {e}")
6166
return None
6267

6368
# parse_json_result
64-
chunk_res = parse_json_result(raw)
65-
if not chunk_res:
69+
try:
70+
chunk_res = parse_json_result(raw)
71+
if not chunk_res:
72+
logger.warning(f"[Parse] Failed to parse result: {raw}")
73+
return None
74+
except Exception as e:
75+
logger.error(f"[Parse] Exception during JSON parsing: {e}")
6676
return None
6777

68-
value = chunk_res.get("value")
69-
if not value:
78+
try:
79+
value = chunk_res.get("value", "").strip()
80+
if not value:
81+
logger.warning("[BuildNode] value is empty")
82+
return None
83+
84+
tags = chunk_res.get("tags", [])
85+
if not isinstance(tags, list):
86+
tags = []
87+
88+
key = chunk_res.get("key", None)
89+
90+
embedding = embedder.embed([value])[0]
91+
92+
return TextualMemoryItem(
93+
memory=value,
94+
metadata=TreeNodeTextualMemoryMetadata(
95+
user_id=info.get("user_id", ""),
96+
session_id=info.get("session_id", ""),
97+
memory_type="LongTermMemory",
98+
status="activated",
99+
tags=tags,
100+
key=key,
101+
embedding=embedding,
102+
usage=[],
103+
sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}],
104+
background="",
105+
confidence=0.99,
106+
type="fact",
107+
),
108+
)
109+
except Exception as e:
110+
logger.error(f"[BuildNode] Error building node: {e}")
70111
return None
71112

72-
# embed
73-
embedding = embedder.embed([value])[0]
74-
75-
# TextualMemoryItem
76-
tags = chunk_res["tags"] if isinstance(chunk_res.get("tags"), list) else []
77-
key = chunk_res.get("key", None)
78-
node_i = TextualMemoryItem(
79-
memory=value,
80-
metadata=TreeNodeTextualMemoryMetadata(
81-
user_id=info.get("user_id"),
82-
session_id=info.get("session_id"),
83-
memory_type="LongTermMemory",
84-
status="activated",
85-
tags=tags,
86-
key=key,
87-
embedding=embedding,
88-
usage=[],
89-
sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}],
90-
background="",
91-
confidence=0.99,
92-
type="fact",
93-
),
94-
)
95-
return node_i
96-
97113

98114
class SimpleStructMemReader(BaseMemReader, ABC):
99115
"""Naive implementation of MemReader."""
@@ -129,40 +145,57 @@ def _process_chat_data(self, scene_data_info, info):
129145

130146
messages = [{"role": "user", "content": prompt}]
131147

132-
response_text = self.llm.generate(messages)
133-
response_json = self.parse_json_result(response_text)
148+
try:
149+
response_text = self.llm.generate(messages)
150+
response_json = self.parse_json_result(response_text)
151+
except Exception as e:
152+
logger.error(f"[LLM] Exception during chat generation: {e}")
153+
response_json = {
154+
"memory list": [
155+
{
156+
"key": "\n".join(mem_list)[:10],
157+
"memory_type": "UserMemory",
158+
"value": "\n".join(mem_list),
159+
"tags": [],
160+
}
161+
],
162+
"summary": "\n".join(mem_list),
163+
}
134164

135165
chat_read_nodes = []
136166
for memory_i_raw in response_json.get("memory list", []):
137-
memory_type = (
138-
memory_i_raw.get("memory_type", "LongTermMemory")
139-
.replace("长期记忆", "LongTermMemory")
140-
.replace("用户记忆", "UserMemory")
141-
)
142-
143-
if memory_type not in ["LongTermMemory", "UserMemory"]:
144-
memory_type = "LongTermMemory"
145-
146-
node_i = TextualMemoryItem(
147-
memory=memory_i_raw.get("value", ""),
148-
metadata=TreeNodeTextualMemoryMetadata(
149-
user_id=info.get("user_id"),
150-
session_id=info.get("session_id"),
151-
memory_type=memory_type,
152-
status="activated",
153-
tags=memory_i_raw.get("tags", [])
154-
if type(memory_i_raw.get("tags", [])) is list
155-
else [],
156-
key=memory_i_raw.get("key", ""),
157-
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
158-
usage=[],
159-
sources=scene_data_info,
160-
background=response_json.get("summary", ""),
161-
confidence=0.99,
162-
type="fact",
163-
),
164-
)
165-
chat_read_nodes.append(node_i)
167+
try:
168+
memory_type = (
169+
memory_i_raw.get("memory_type", "LongTermMemory")
170+
.replace("长期记忆", "LongTermMemory")
171+
.replace("用户记忆", "UserMemory")
172+
)
173+
174+
if memory_type not in ["LongTermMemory", "UserMemory"]:
175+
memory_type = "LongTermMemory"
176+
177+
node_i = TextualMemoryItem(
178+
memory=memory_i_raw.get("value", ""),
179+
metadata=TreeNodeTextualMemoryMetadata(
180+
user_id=info.get("user_id"),
181+
session_id=info.get("session_id"),
182+
memory_type=memory_type,
183+
status="activated",
184+
tags=memory_i_raw.get("tags", [])
185+
if type(memory_i_raw.get("tags", [])) is list
186+
else [],
187+
key=memory_i_raw.get("key", ""),
188+
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
189+
usage=[],
190+
sources=scene_data_info,
191+
background=response_json.get("summary", ""),
192+
confidence=0.99,
193+
type="fact",
194+
),
195+
)
196+
chat_read_nodes.append(node_i)
197+
except Exception as e:
198+
logger.error(f"[ChatReader] Error parsing memory item: {e}")
166199

167200
return chat_read_nodes
168201

@@ -267,8 +300,12 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
267300
for item in scene_data:
268301
try:
269302
if os.path.exists(item):
270-
parsed_text = parser.parse(item)
271-
results.append({"file": item, "text": parsed_text})
303+
try:
304+
parsed_text = parser.parse(item)
305+
results.append({"file": item, "text": parsed_text})
306+
except Exception as e:
307+
logger.error(f"[SceneParser] Error parsing {item}: {e}")
308+
continue
272309
else:
273310
parsed_text = item
274311
results.append({"file": "pure_text", "text": parsed_text})
@@ -315,21 +352,22 @@ def _process_doc_data(self, scene_data_info, info, **kwargs):
315352
doc_nodes.append(node)
316353
except Exception as e:
317354
tqdm.write(f"[ERROR] {e}")
355+
logger.error(f"[DocReader] Future task failed: {e}")
318356
return doc_nodes
319357

320358
def parse_json_result(self, response_text):
321359
try:
322360
json_start = response_text.find("{")
323361
response_text = response_text[json_start:]
324362
response_text = response_text.replace("```", "").strip()
325-
if response_text[-1] != "}":
363+
if not response_text.endswith("}"):
326364
response_text += "}"
327-
response_json = json.loads(response_text)
328-
return response_json
365+
return json.loads(response_text)
329366
except json.JSONDecodeError as e:
330-
logger.warning(
331-
f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
332-
)
367+
logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}")
368+
return {}
369+
except Exception as e:
370+
logger.error(f"[JSONParse] Unexpected error: {e}")
333371
return {}
334372

335373
def transform_memreader(self, data: dict) -> list[TextualMemoryItem]:

0 commit comments

Comments
 (0)