@@ -56,44 +56,60 @@ def detect_lang(text):
5656
5757def _build_node (idx , message , info , scene_file , llm , parse_json_result , embedder ):
5858 # generate
59- raw = llm .generate (message )
60- if not raw :
59+ try :
60+ raw = llm .generate (message )
61+ if not raw :
62+ logger .warning (f"[LLM] Empty generation for input: { message } " )
63+ return None
64+ except Exception as e :
65+ logger .error (f"[LLM] Exception during generation: { e } " )
6166 return None
6267
6368 # parse_json_result
64- chunk_res = parse_json_result (raw )
65- if not chunk_res :
69+ try :
70+ chunk_res = parse_json_result (raw )
71+ if not chunk_res :
72+ logger .warning (f"[Parse] Failed to parse result: { raw } " )
73+ return None
74+ except Exception as e :
75+ logger .error (f"[Parse] Exception during JSON parsing: { e } " )
6676 return None
6777
68- value = chunk_res .get ("value" )
69- if not value :
78+ try :
79+ value = chunk_res .get ("value" , "" ).strip ()
80+ if not value :
81+ logger .warning ("[BuildNode] value is empty" )
82+ return None
83+
84+ tags = chunk_res .get ("tags" , [])
85+ if not isinstance (tags , list ):
86+ tags = []
87+
88+ key = chunk_res .get ("key" , None )
89+
90+ embedding = embedder .embed ([value ])[0 ]
91+
92+ return TextualMemoryItem (
93+ memory = value ,
94+ metadata = TreeNodeTextualMemoryMetadata (
95+ user_id = info .get ("user_id" , "" ),
96+ session_id = info .get ("session_id" , "" ),
97+ memory_type = "LongTermMemory" ,
98+ status = "activated" ,
99+ tags = tags ,
100+ key = key ,
101+ embedding = embedding ,
102+ usage = [],
103+ sources = [{"type" : "doc" , "doc_path" : f"{ scene_file } _{ idx } " }],
104+ background = "" ,
105+ confidence = 0.99 ,
106+ type = "fact" ,
107+ ),
108+ )
109+ except Exception as e :
110+ logger .error (f"[BuildNode] Error building node: { e } " )
70111 return None
71112
72- # embed
73- embedding = embedder .embed ([value ])[0 ]
74-
75- # TextualMemoryItem
76- tags = chunk_res ["tags" ] if isinstance (chunk_res .get ("tags" ), list ) else []
77- key = chunk_res .get ("key" , None )
78- node_i = TextualMemoryItem (
79- memory = value ,
80- metadata = TreeNodeTextualMemoryMetadata (
81- user_id = info .get ("user_id" ),
82- session_id = info .get ("session_id" ),
83- memory_type = "LongTermMemory" ,
84- status = "activated" ,
85- tags = tags ,
86- key = key ,
87- embedding = embedding ,
88- usage = [],
89- sources = [{"type" : "doc" , "doc_path" : f"{ scene_file } _{ idx } " }],
90- background = "" ,
91- confidence = 0.99 ,
92- type = "fact" ,
93- ),
94- )
95- return node_i
96-
97113
98114class SimpleStructMemReader (BaseMemReader , ABC ):
99115 """Naive implementation of MemReader."""
@@ -129,40 +145,57 @@ def _process_chat_data(self, scene_data_info, info):
129145
130146 messages = [{"role" : "user" , "content" : prompt }]
131147
132- response_text = self .llm .generate (messages )
133- response_json = self .parse_json_result (response_text )
148+ try :
149+ response_text = self .llm .generate (messages )
150+ response_json = self .parse_json_result (response_text )
151+ except Exception as e :
152+ logger .error (f"[LLM] Exception during chat generation: { e } " )
153+ response_json = {
154+ "memory list" : [
155+ {
156+ "key" : "\n " .join (mem_list )[:10 ],
157+ "memory_type" : "UserMemory" ,
158+ "value" : "\n " .join (mem_list ),
159+ "tags" : [],
160+ }
161+ ],
162+ "summary" : "\n " .join (mem_list ),
163+ }
134164
135165 chat_read_nodes = []
136166 for memory_i_raw in response_json .get ("memory list" , []):
137- memory_type = (
138- memory_i_raw .get ("memory_type" , "LongTermMemory" )
139- .replace ("长期记忆" , "LongTermMemory" )
140- .replace ("用户记忆" , "UserMemory" )
141- )
142-
143- if memory_type not in ["LongTermMemory" , "UserMemory" ]:
144- memory_type = "LongTermMemory"
145-
146- node_i = TextualMemoryItem (
147- memory = memory_i_raw .get ("value" , "" ),
148- metadata = TreeNodeTextualMemoryMetadata (
149- user_id = info .get ("user_id" ),
150- session_id = info .get ("session_id" ),
151- memory_type = memory_type ,
152- status = "activated" ,
153- tags = memory_i_raw .get ("tags" , [])
154- if type (memory_i_raw .get ("tags" , [])) is list
155- else [],
156- key = memory_i_raw .get ("key" , "" ),
157- embedding = self .embedder .embed ([memory_i_raw .get ("value" , "" )])[0 ],
158- usage = [],
159- sources = scene_data_info ,
160- background = response_json .get ("summary" , "" ),
161- confidence = 0.99 ,
162- type = "fact" ,
163- ),
164- )
165- chat_read_nodes .append (node_i )
167+ try :
168+ memory_type = (
169+ memory_i_raw .get ("memory_type" , "LongTermMemory" )
170+ .replace ("长期记忆" , "LongTermMemory" )
171+ .replace ("用户记忆" , "UserMemory" )
172+ )
173+
174+ if memory_type not in ["LongTermMemory" , "UserMemory" ]:
175+ memory_type = "LongTermMemory"
176+
177+ node_i = TextualMemoryItem (
178+ memory = memory_i_raw .get ("value" , "" ),
179+ metadata = TreeNodeTextualMemoryMetadata (
180+ user_id = info .get ("user_id" ),
181+ session_id = info .get ("session_id" ),
182+ memory_type = memory_type ,
183+ status = "activated" ,
184+ tags = memory_i_raw .get ("tags" , [])
185+ if type (memory_i_raw .get ("tags" , [])) is list
186+ else [],
187+ key = memory_i_raw .get ("key" , "" ),
188+ embedding = self .embedder .embed ([memory_i_raw .get ("value" , "" )])[0 ],
189+ usage = [],
190+ sources = scene_data_info ,
191+ background = response_json .get ("summary" , "" ),
192+ confidence = 0.99 ,
193+ type = "fact" ,
194+ ),
195+ )
196+ chat_read_nodes .append (node_i )
197+ except Exception as e :
198+ logger .error (f"[ChatReader] Error parsing memory item: { e } " )
166199
167200 return chat_read_nodes
168201
@@ -267,8 +300,12 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
267300 for item in scene_data :
268301 try :
269302 if os .path .exists (item ):
270- parsed_text = parser .parse (item )
271- results .append ({"file" : item , "text" : parsed_text })
303+ try :
304+ parsed_text = parser .parse (item )
305+ results .append ({"file" : item , "text" : parsed_text })
306+ except Exception as e :
307+ logger .error (f"[SceneParser] Error parsing { item } : { e } " )
308+ continue
272309 else :
273310 parsed_text = item
274311 results .append ({"file" : "pure_text" , "text" : parsed_text })
@@ -315,21 +352,22 @@ def _process_doc_data(self, scene_data_info, info, **kwargs):
315352 doc_nodes .append (node )
316353 except Exception as e :
317354 tqdm .write (f"[ERROR] { e } " )
355+ logger .error (f"[DocReader] Future task failed: { e } " )
318356 return doc_nodes
319357
320358 def parse_json_result (self , response_text ):
321359 try :
322360 json_start = response_text .find ("{" )
323361 response_text = response_text [json_start :]
324362 response_text = response_text .replace ("```" , "" ).strip ()
325- if response_text [ - 1 ] != "}" :
363+ if not response_text . endswith ( "}" ) :
326364 response_text += "}"
327- response_json = json .loads (response_text )
328- return response_json
365+ return json .loads (response_text )
329366 except json .JSONDecodeError as e :
330- logger .warning (
331- f"Failed to parse LLM response as JSON: { e } \n Raw response:\n { response_text } "
332- )
367+ logger .error (f"[JSONParse] Failed to decode JSON: { e } \n Raw:\n { response_text } " )
368+ return {}
369+ except Exception as e :
370+ logger .error (f"[JSONParse] Unexpected error: { e } " )
333371 return {}
334372
335373 def transform_memreader (self , data : dict ) -> list [TextualMemoryItem ]:
0 commit comments