@@ -99,15 +99,13 @@ def __init__(
9999
100100 def handle_chat_complete (self , chat_req : APIChatCompleteRequest ) -> dict [str , Any ]:
101101 """
102- Chat with MemOS for complete response (non-streaming).
103-
104- This implementation directly uses search/add handlers instead of mos_server.
102+ Chat with MemOS for chat complete response (non-streaming).
105103
106104 Args:
107105 chat_req: Chat complete request
108106
109107 Returns:
110- Dictionary with response and references
108+ Dictionary with chat complete response and reasoning
111109
112110 Raises:
113111 HTTPException: If chat fails
@@ -161,7 +159,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
161159 {"role" : "user" , "content" : chat_req .query },
162160 ]
163161
164- self .logger .info ("Starting to generate complete response..." )
162+ self .logger .info ("[Cloud Service] Starting to generate chat complete response..." )
165163
166164 # Step 3: Generate complete response from LLM
167165 if chat_req .model_name_or_path and chat_req .model_name_or_path not in self .chat_llms :
@@ -172,11 +170,23 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
172170
173171 model = chat_req .model_name_or_path or next (iter (self .chat_llms .keys ()))
174172
175- self .logger .info (f"[Cloud Service Chat Complete Model] : { model } " )
173+ self .logger .info (f"[Cloud Service] Chat Complete Model: { model } " )
176174 strat = time .time ()
177175 response = self .chat_llms [model ].generate (current_messages , model_name_or_path = model )
178176 end = time .time ()
179- self .logger .info (f"[Cloud Service Chat Complete Time]: { end - strat } seconds" )
177+ self .logger .info (f"[Cloud Service] Chat Complete Time: { end - strat } seconds" )
178+
179+ if not response :
180+ self .logger .error (
181+ f"[Cloud Service] Chat Complete Failed, LLM response is { response } "
182+ )
183+ raise HTTPException (
184+ status_code = 500 , detail = "Chat complete failed, LLM response is None"
185+ )
186+
187+ self .logger .info (
188+ f"[Cloud Service] Chat Complete LLM Input: { json .dumps (current_messages , ensure_ascii = False )} Chat Complete LLM Response: { response } "
189+ )
180190
181191 # Step 4: start add after chat asynchronously
182192 if chat_req .add_message_on_answer :
@@ -192,7 +202,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
192202 async_mode = "async" ,
193203 )
194204 end = time .time ()
195- self .logger .info (f"[Cloud Service Chat Add Time] : { end - start } seconds" )
205+ self .logger .info (f"[Cloud Service] Chat Add Time: { end - start } seconds" )
196206
197207 match = re .search (r"<think>([\s\S]*?)</think>" , response )
198208 reasoning_text = match .group (1 ) if match else None
@@ -208,14 +218,12 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
208218 except ValueError as err :
209219 raise HTTPException (status_code = 404 , detail = str (traceback .format_exc ())) from err
210220 except Exception as err :
211- self .logger .error (f"Failed to complete chat: { traceback .format_exc ()} " )
221+ self .logger .error (f"[Cloud Service] Failed to chat complete : { traceback .format_exc ()} " )
212222 raise HTTPException (status_code = 500 , detail = str (traceback .format_exc ())) from err
213223
214224 def handle_chat_stream (self , chat_req : ChatRequest ) -> StreamingResponse :
215225 """
216- Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.
217-
218- This implementation directly uses search_handler and add_handler.
226+ Chat with MemOS via Server-Sent Events (SSE) stream for chat stream response.
219227
220228 Args:
221229 chat_req: Chat stream request
@@ -229,7 +237,7 @@ def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse:
229237 try :
230238
231239 def generate_chat_response () -> Generator [str , None , None ]:
232- """Generate chat response as SSE stream."""
240+ """Generate chat stream response as SSE stream."""
233241 try :
234242 # Resolve readable cube IDs (for search)
235243 readable_cube_ids = chat_req .readable_cube_ids or (
@@ -289,7 +297,7 @@ def generate_chat_response() -> Generator[str, None, None]:
289297 ]
290298
291299 self .logger .info (
292- f"user_id: { chat_req .user_id } , readable_cube_ids: { readable_cube_ids } , "
300+ f"[Cloud Service] chat stream user_id: { chat_req .user_id } , readable_cube_ids: { readable_cube_ids } , "
293301 f"current_system_prompt: { system_prompt } "
294302 )
295303
@@ -304,14 +312,12 @@ def generate_chat_response() -> Generator[str, None, None]:
304312 )
305313
306314 model = chat_req .model_name_or_path or next (iter (self .chat_llms .keys ()))
307- self .logger .info (f"[Cloud Service Chat Stream Model] : { model } " )
315+ self .logger .info (f"[Cloud Service] Chat Stream Model: { model } " )
308316
309317 start = time .time ()
310318 response_stream = self .chat_llms [model ].generate_stream (
311319 current_messages , model_name_or_path = model
312320 )
313- end = time .time ()
314- self .logger .info (f"[Cloud Service Chat Stream Time]: { end - start } seconds" )
315321
316322 # Stream the response
317323 buffer = ""
@@ -337,6 +343,13 @@ def generate_chat_response() -> Generator[str, None, None]:
337343 chunk_data = f"data: { json .dumps ({'type' : 'text' , 'data' : chunk }, ensure_ascii = False )} \n \n "
338344 yield chunk_data
339345
346+ end = time .time ()
347+ self .logger .info (f"[Cloud Service] Chat Stream Time: { end - start } seconds" )
348+
349+ self .logger .info (
350+ f"[Cloud Service] Chat Stream LLM Input: { json .dumps (current_messages , ensure_ascii = False )} Chat Stream LLM Response: { full_response } "
351+ )
352+
340353 current_messages .append ({"role" : "assistant" , "content" : full_response })
341354 if chat_req .add_message_on_answer :
342355 # Resolve writable cube IDs (for add)
@@ -354,10 +367,10 @@ def generate_chat_response() -> Generator[str, None, None]:
354367 )
355368 end = time .time ()
356369 self .logger .info (
357- f"[Cloud Service Chat Stream Add Time] : { end - start } seconds"
370+ f"[Cloud Service] Chat Stream Add Time: { end - start } seconds"
358371 )
359372 except Exception as e :
360- self .logger .error (f"Error in chat stream: { e } " , exc_info = True )
373+ self .logger .error (f"[Cloud Service] Error in chat stream: { e } " , exc_info = True )
361374 error_data = f"data: { json .dumps ({'type' : 'error' , 'content' : str (traceback .format_exc ())})} \n \n "
362375 yield error_data
363376
@@ -377,14 +390,14 @@ def generate_chat_response() -> Generator[str, None, None]:
377390 except ValueError as err :
378391 raise HTTPException (status_code = 404 , detail = str (traceback .format_exc ())) from err
379392 except Exception as err :
380- self .logger .error (f"Failed to start chat stream: { traceback .format_exc ()} " )
393+ self .logger .error (
394+ f"[Cloud Service] Failed to start chat stream: { traceback .format_exc ()} "
395+ )
381396 raise HTTPException (status_code = 500 , detail = str (traceback .format_exc ())) from err
382397
383398 def handle_chat_stream_playground (self , chat_req : ChatPlaygroundRequest ) -> StreamingResponse :
384399 """
385- Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.
386-
387- This implementation directly uses search_handler and add_handler.
400+ Chat with MemOS via Server-Sent Events (SSE) stream for playground chat stream response.
388401
389402 Args:
390403 chat_req: Chat stream request
@@ -398,7 +411,7 @@ def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> Stre
398411 try :
399412
400413 def generate_chat_response () -> Generator [str , None , None ]:
401- """Generate chat response as SSE stream."""
414+ """Generate playground chat stream response as SSE stream."""
402415 try :
403416 import time
404417
@@ -434,7 +447,9 @@ def generate_chat_response() -> Generator[str, None, None]:
434447 start_time = time .time ()
435448 search_response = self .search_handler .handle_search_memories (search_req )
436449 end_time = time .time ()
437- self .logger .info (f"first search time: { end_time - start_time } " )
450+ self .logger .info (
451+ f"[PLAYGROUND CHAT] first search time: { end_time - start_time } "
452+ )
438453
439454 yield f"data: { json .dumps ({'type' : 'status' , 'data' : '1' })} \n \n "
440455
@@ -481,7 +496,7 @@ def generate_chat_response() -> Generator[str, None, None]:
481496 conversation = chat_req .history ,
482497 mode = "fine" ,
483498 )
484- self .logger .info (f"[PLAYGROUND chat parsed_goal] : { parsed_goal } " )
499+ self .logger .info (f"[PLAYGROUND CHAT] parsed_goal: { parsed_goal } " )
485500
486501 if chat_req .beginner_guide_step == "first" :
487502 chat_req .internet_search = False
@@ -512,12 +527,14 @@ def generate_chat_response() -> Generator[str, None, None]:
512527 search_tool_memory = False ,
513528 )
514529
515- self .logger .info (f"[PLAYGROUND second search query] : { search_req .query } " )
530+ self .logger .info (f"[PLAYGROUND CHAT] second search query: { search_req .query } " )
516531
517532 start_time = time .time ()
518533 search_response = self .search_handler .handle_search_memories (search_req )
519534 end_time = time .time ()
520- self .logger .info (f"second search time: { end_time - start_time } " )
535+ self .logger .info (
536+ f"[PLAYGROUND CHAT] second search time: { end_time - start_time } "
537+ )
521538
522539 # for playground, add the query to memory without response
523540 self ._start_add_to_memory (
@@ -578,13 +595,15 @@ def generate_chat_response() -> Generator[str, None, None]:
578595 ]
579596
580597 self .logger .info (
581- f"user_id: { chat_req .user_id } , readable_cube_ids: { readable_cube_ids } , "
598+ f"[PLAYGROUND CHAT] user_id: { chat_req .user_id } , readable_cube_ids: { readable_cube_ids } , "
582599 f"current_system_prompt: { system_prompt } "
583600 )
584601
585602 # Step 3: Generate streaming response from LLM
586603 try :
587604 model = next (iter (self .chat_llms .keys ()))
605+ self .logger .info (f"[PLAYGROUND CHAT] Chat Playground Stream Model: { model } " )
606+ start = time .time ()
588607 response_stream = self .chat_llms [model ].generate_stream (
589608 current_messages , model_name_or_path = model
590609 )
@@ -629,10 +648,19 @@ def generate_chat_response() -> Generator[str, None, None]:
629648 chunk_data = f"data: { json .dumps ({'type' : 'text' , 'data' : processed_chunk }, ensure_ascii = False )} \n \n "
630649 yield chunk_data
631650
651+ end = time .time ()
652+ self .logger .info (
653+ f"[PLAYGROUND CHAT] Chat Playground Stream Time: { end - start } seconds"
654+ )
655+ self .logger .info (
656+ f"[PLAYGROUND CHAT] Chat Playground Stream LLM Input: { json .dumps (current_messages , ensure_ascii = False )} Chat Playground Stream LLM Response: { full_response } "
657+ )
658+
632659 except Exception as llm_error :
633660 # Log the error
634661 self .logger .error (
635- f"Error during LLM generation: { llm_error } " , exc_info = True
662+ f"[PLAYGROUND CHAT] Error during LLM generation: { llm_error } " ,
663+ exc_info = True ,
636664 )
637665 # Send error message to client
638666 error_msg = f"模型生成错误: { llm_error !s} "
@@ -654,7 +682,7 @@ def generate_chat_response() -> Generator[str, None, None]:
654682 # Get further suggestion
655683 current_messages .append ({"role" : "assistant" , "content" : full_response })
656684 further_suggestion = self ._get_further_suggestion (current_messages )
657- self .logger .info (f"further_suggestion: { further_suggestion } " )
685+ self .logger .info (f"[PLAYGROUND CHAT] further_suggestion: { further_suggestion } " )
658686 yield f"data: { json .dumps ({'type' : 'suggestion' , 'data' : further_suggestion })} \n \n "
659687
660688 yield f"data: { json .dumps ({'type' : 'end' })} \n \n "
@@ -685,7 +713,9 @@ def generate_chat_response() -> Generator[str, None, None]:
685713 )
686714
687715 except Exception as e :
688- self .logger .error (f"Error in chat stream: { e } " , exc_info = True )
716+ self .logger .error (
717+ f"[PLAYGROUND CHAT] Error in playground chat stream: { e } " , exc_info = True
718+ )
689719 error_data = f"data: { json .dumps ({'type' : 'error' , 'content' : str (traceback .format_exc ())})} \n \n "
690720 yield error_data
691721
@@ -705,7 +735,9 @@ def generate_chat_response() -> Generator[str, None, None]:
705735 except ValueError as err :
706736 raise HTTPException (status_code = 404 , detail = str (traceback .format_exc ())) from err
707737 except Exception as err :
708- self .logger .error (f"Failed to start chat stream: { traceback .format_exc ()} " )
738+ self .logger .error (
739+ f"[PLAYGROUND CHAT] Failed to start playground chat stream: { traceback .format_exc ()} "
740+ )
709741 raise HTTPException (status_code = 500 , detail = str (traceback .format_exc ())) from err
710742
711743 def _dedup_and_supplement_memories (
0 commit comments