Skip to content

Commit b11c768

Browse files
Wang-Daojiyuan.wangfridayLCaralHsi
authored
fix: improve chat playground stability and chat handler initialization (#770)
* fix playground bug, internet search judge * fix playground internet bug * modify delete mem * modify tool resp bug in multi cube * fix bug in playground chat handle and search inter * modify prompt * fix bug in playground * fix bug playfround * fix bug * fix code * fix model bug in playground * modify plan b * llm param modify * add logger in playground * modify code * fix bug * modify code * modify code * fix bug * fix search bug in plarground * fixx bug * move schadualr to back * modify pref location * modify fast net search * add tags and new package * modify prompt fix bug * remove nltk due to image promblem * prompt modify * modify bug remove redundant field * modify bug * fix playground bug * fix bug * bust internet topk * bust to 50 * fix bug cite * modify search * remote query add in playground * modify bug * modify pref bug * move add position * modify chat prompt * modify overthinking * add logger in playground chat * midify mem * remove must in prompt * add logger * add logger --------- Co-authored-by: yuan.wang <[email protected]> Co-authored-by: chunyu li <[email protected]> Co-authored-by: CaralHsi <[email protected]>
1 parent 5cf0282 commit b11c768

File tree

5 files changed

+104
-41
lines changed

5 files changed

+104
-41
lines changed

src/memos/api/handlers/chat_handler.py

Lines changed: 65 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,13 @@ def __init__(
9999

100100
def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, Any]:
101101
"""
102-
Chat with MemOS for complete response (non-streaming).
103-
104-
This implementation directly uses search/add handlers instead of mos_server.
102+
Chat with MemOS for chat complete response (non-streaming).
105103
106104
Args:
107105
chat_req: Chat complete request
108106
109107
Returns:
110-
Dictionary with response and references
108+
Dictionary with chat complete response and reasoning
111109
112110
Raises:
113111
HTTPException: If chat fails
@@ -161,7 +159,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
161159
{"role": "user", "content": chat_req.query},
162160
]
163161

164-
self.logger.info("Starting to generate complete response...")
162+
self.logger.info("[Cloud Service] Starting to generate chat complete response...")
165163

166164
# Step 3: Generate complete response from LLM
167165
if chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms:
@@ -172,11 +170,23 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
172170

173171
model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
174172

175-
self.logger.info(f"[Cloud Service Chat Complete Model]: {model}")
173+
self.logger.info(f"[Cloud Service] Chat Complete Model: {model}")
176174
strat = time.time()
177175
response = self.chat_llms[model].generate(current_messages, model_name_or_path=model)
178176
end = time.time()
179-
self.logger.info(f"[Cloud Service Chat Complete Time]: {end - strat} seconds")
177+
self.logger.info(f"[Cloud Service] Chat Complete Time: {end - strat} seconds")
178+
179+
if not response:
180+
self.logger.error(
181+
f"[Cloud Service] Chat Complete Failed, LLM response is {response}"
182+
)
183+
raise HTTPException(
184+
status_code=500, detail="Chat complete failed, LLM response is None"
185+
)
186+
187+
self.logger.info(
188+
f"[Cloud Service] Chat Complete LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Complete LLM Response: {response}"
189+
)
180190

181191
# Step 4: start add after chat asynchronously
182192
if chat_req.add_message_on_answer:
@@ -192,7 +202,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
192202
async_mode="async",
193203
)
194204
end = time.time()
195-
self.logger.info(f"[Cloud Service Chat Add Time]: {end - start} seconds")
205+
self.logger.info(f"[Cloud Service] Chat Add Time: {end - start} seconds")
196206

197207
match = re.search(r"<think>([\s\S]*?)</think>", response)
198208
reasoning_text = match.group(1) if match else None
@@ -208,14 +218,12 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
208218
except ValueError as err:
209219
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
210220
except Exception as err:
211-
self.logger.error(f"Failed to complete chat: {traceback.format_exc()}")
221+
self.logger.error(f"[Cloud Service] Failed to chat complete: {traceback.format_exc()}")
212222
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err
213223

214224
def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse:
215225
"""
216-
Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.
217-
218-
This implementation directly uses search_handler and add_handler.
226+
Chat with MemOS via Server-Sent Events (SSE) stream for chat stream response.
219227
220228
Args:
221229
chat_req: Chat stream request
@@ -229,7 +237,7 @@ def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse:
229237
try:
230238

231239
def generate_chat_response() -> Generator[str, None, None]:
232-
"""Generate chat response as SSE stream."""
240+
"""Generate chat stream response as SSE stream."""
233241
try:
234242
# Resolve readable cube IDs (for search)
235243
readable_cube_ids = chat_req.readable_cube_ids or (
@@ -289,7 +297,7 @@ def generate_chat_response() -> Generator[str, None, None]:
289297
]
290298

291299
self.logger.info(
292-
f"user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
300+
f"[Cloud Service] chat stream user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
293301
f"current_system_prompt: {system_prompt}"
294302
)
295303

@@ -304,14 +312,12 @@ def generate_chat_response() -> Generator[str, None, None]:
304312
)
305313

306314
model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
307-
self.logger.info(f"[Cloud Service Chat Stream Model]: {model}")
315+
self.logger.info(f"[Cloud Service] Chat Stream Model: {model}")
308316

309317
start = time.time()
310318
response_stream = self.chat_llms[model].generate_stream(
311319
current_messages, model_name_or_path=model
312320
)
313-
end = time.time()
314-
self.logger.info(f"[Cloud Service Chat Stream Time]: {end - start} seconds")
315321

316322
# Stream the response
317323
buffer = ""
@@ -337,6 +343,13 @@ def generate_chat_response() -> Generator[str, None, None]:
337343
chunk_data = f"data: {json.dumps({'type': 'text', 'data': chunk}, ensure_ascii=False)}\n\n"
338344
yield chunk_data
339345

346+
end = time.time()
347+
self.logger.info(f"[Cloud Service] Chat Stream Time: {end - start} seconds")
348+
349+
self.logger.info(
350+
f"[Cloud Service] Chat Stream LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Stream LLM Response: {full_response}"
351+
)
352+
340353
current_messages.append({"role": "assistant", "content": full_response})
341354
if chat_req.add_message_on_answer:
342355
# Resolve writable cube IDs (for add)
@@ -354,10 +367,10 @@ def generate_chat_response() -> Generator[str, None, None]:
354367
)
355368
end = time.time()
356369
self.logger.info(
357-
f"[Cloud Service Chat Stream Add Time]: {end - start} seconds"
370+
f"[Cloud Service] Chat Stream Add Time: {end - start} seconds"
358371
)
359372
except Exception as e:
360-
self.logger.error(f"Error in chat stream: {e}", exc_info=True)
373+
self.logger.error(f"[Cloud Service] Error in chat stream: {e}", exc_info=True)
361374
error_data = f"data: {json.dumps({'type': 'error', 'content': str(traceback.format_exc())})}\n\n"
362375
yield error_data
363376

@@ -377,14 +390,14 @@ def generate_chat_response() -> Generator[str, None, None]:
377390
except ValueError as err:
378391
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
379392
except Exception as err:
380-
self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}")
393+
self.logger.error(
394+
f"[Cloud Service] Failed to start chat stream: {traceback.format_exc()}"
395+
)
381396
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err
382397

383398
def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> StreamingResponse:
384399
"""
385-
Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.
386-
387-
This implementation directly uses search_handler and add_handler.
400+
Chat with MemOS via Server-Sent Events (SSE) stream for playground chat stream response.
388401
389402
Args:
390403
chat_req: Chat stream request
@@ -398,7 +411,7 @@ def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> Stre
398411
try:
399412

400413
def generate_chat_response() -> Generator[str, None, None]:
401-
"""Generate chat response as SSE stream."""
414+
"""Generate playground chat stream response as SSE stream."""
402415
try:
403416
import time
404417

@@ -434,7 +447,9 @@ def generate_chat_response() -> Generator[str, None, None]:
434447
start_time = time.time()
435448
search_response = self.search_handler.handle_search_memories(search_req)
436449
end_time = time.time()
437-
self.logger.info(f"first search time: {end_time - start_time}")
450+
self.logger.info(
451+
f"[PLAYGROUND CHAT] first search time: {end_time - start_time}"
452+
)
438453

439454
yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
440455

@@ -481,7 +496,7 @@ def generate_chat_response() -> Generator[str, None, None]:
481496
conversation=chat_req.history,
482497
mode="fine",
483498
)
484-
self.logger.info(f"[PLAYGROUND chat parsed_goal]: {parsed_goal}")
499+
self.logger.info(f"[PLAYGROUND CHAT] parsed_goal: {parsed_goal}")
485500

486501
if chat_req.beginner_guide_step == "first":
487502
chat_req.internet_search = False
@@ -512,12 +527,14 @@ def generate_chat_response() -> Generator[str, None, None]:
512527
search_tool_memory=False,
513528
)
514529

515-
self.logger.info(f"[PLAYGROUND second search query]: {search_req.query}")
530+
self.logger.info(f"[PLAYGROUND CHAT] second search query: {search_req.query}")
516531

517532
start_time = time.time()
518533
search_response = self.search_handler.handle_search_memories(search_req)
519534
end_time = time.time()
520-
self.logger.info(f"second search time: {end_time - start_time}")
535+
self.logger.info(
536+
f"[PLAYGROUND CHAT] second search time: {end_time - start_time}"
537+
)
521538

522539
# for playground, add the query to memory without response
523540
self._start_add_to_memory(
@@ -578,13 +595,15 @@ def generate_chat_response() -> Generator[str, None, None]:
578595
]
579596

580597
self.logger.info(
581-
f"user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
598+
f"[PLAYGROUND CHAT] user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
582599
f"current_system_prompt: {system_prompt}"
583600
)
584601

585602
# Step 3: Generate streaming response from LLM
586603
try:
587604
model = next(iter(self.chat_llms.keys()))
605+
self.logger.info(f"[PLAYGROUND CHAT] Chat Playground Stream Model: {model}")
606+
start = time.time()
588607
response_stream = self.chat_llms[model].generate_stream(
589608
current_messages, model_name_or_path=model
590609
)
@@ -629,10 +648,19 @@ def generate_chat_response() -> Generator[str, None, None]:
629648
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
630649
yield chunk_data
631650

651+
end = time.time()
652+
self.logger.info(
653+
f"[PLAYGROUND CHAT] Chat Playground Stream Time: {end - start} seconds"
654+
)
655+
self.logger.info(
656+
f"[PLAYGROUND CHAT] Chat Playground Stream LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Playground Stream LLM Response: {full_response}"
657+
)
658+
632659
except Exception as llm_error:
633660
# Log the error
634661
self.logger.error(
635-
f"Error during LLM generation: {llm_error}", exc_info=True
662+
f"[PLAYGROUND CHAT] Error during LLM generation: {llm_error}",
663+
exc_info=True,
636664
)
637665
# Send error message to client
638666
error_msg = f"模型生成错误: {llm_error!s}"
@@ -654,7 +682,7 @@ def generate_chat_response() -> Generator[str, None, None]:
654682
# Get further suggestion
655683
current_messages.append({"role": "assistant", "content": full_response})
656684
further_suggestion = self._get_further_suggestion(current_messages)
657-
self.logger.info(f"further_suggestion: {further_suggestion}")
685+
self.logger.info(f"[PLAYGROUND CHAT] further_suggestion: {further_suggestion}")
658686
yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
659687

660688
yield f"data: {json.dumps({'type': 'end'})}\n\n"
@@ -685,7 +713,9 @@ def generate_chat_response() -> Generator[str, None, None]:
685713
)
686714

687715
except Exception as e:
688-
self.logger.error(f"Error in chat stream: {e}", exc_info=True)
716+
self.logger.error(
717+
f"[PLAYGROUND CHAT] Error in playground chat stream: {e}", exc_info=True
718+
)
689719
error_data = f"data: {json.dumps({'type': 'error', 'content': str(traceback.format_exc())})}\n\n"
690720
yield error_data
691721

@@ -705,7 +735,9 @@ def generate_chat_response() -> Generator[str, None, None]:
705735
except ValueError as err:
706736
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
707737
except Exception as err:
708-
self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}")
738+
self.logger.error(
739+
f"[PLAYGROUND CHAT] Failed to start playground chat stream: {traceback.format_exc()}"
740+
)
709741
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err
710742

711743
def _dedup_and_supplement_memories(

src/memos/api/handlers/component_init.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,11 @@ def init_server() -> dict[str, Any]:
177177
else None
178178
)
179179
llm = LLMFactory.from_config(llm_config)
180-
chat_llms = _init_chat_llms(chat_llm_config)
180+
chat_llms = (
181+
_init_chat_llms(chat_llm_config)
182+
if os.getenv("ENABLE_CHAT_API", "false") == "true"
183+
else None
184+
)
181185
embedder = EmbedderFactory.from_config(embedder_config)
182186
mem_reader = MemReaderFactory.from_config(mem_reader_config)
183187
reranker = RerankerFactory.from_config(reranker_config)

src/memos/api/routers/server_router.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import random as _random
1616
import socket
1717

18-
from fastapi import APIRouter, Query
18+
from fastapi import APIRouter, HTTPException, Query
1919

2020
from memos.api import handlers
2121
from memos.api.handlers.add_handler import AddHandler
@@ -64,12 +64,16 @@
6464
# Initialize all handlers with dependency injection
6565
search_handler = SearchHandler(dependencies)
6666
add_handler = AddHandler(dependencies)
67-
chat_handler = ChatHandler(
68-
dependencies,
69-
components["chat_llms"],
70-
search_handler,
71-
add_handler,
72-
online_bot=components.get("online_bot"),
67+
chat_handler = (
68+
ChatHandler(
69+
dependencies,
70+
components["chat_llms"],
71+
search_handler,
72+
add_handler,
73+
online_bot=components.get("online_bot"),
74+
)
75+
if os.getenv("ENABLE_CHAT_API", "false") == "true"
76+
else None
7377
)
7478
feedback_handler = FeedbackHandler(dependencies)
7579
# Extract commonly used components for function-based handlers
@@ -201,6 +205,10 @@ def chat_complete(chat_req: APIChatCompleteRequest):
201205
202206
This endpoint uses the class-based ChatHandler.
203207
"""
208+
if chat_handler is None:
209+
raise HTTPException(
210+
status_code=503, detail="Chat service is not available. Chat handler not initialized."
211+
)
204212
return chat_handler.handle_chat_complete(chat_req)
205213

206214

@@ -212,6 +220,10 @@ def chat_stream(chat_req: ChatRequest):
212220
This endpoint uses the class-based ChatHandler which internally
213221
composes SearchHandler and AddHandler for a clean architecture.
214222
"""
223+
if chat_handler is None:
224+
raise HTTPException(
225+
status_code=503, detail="Chat service is not available. Chat handler not initialized."
226+
)
215227
return chat_handler.handle_chat_stream(chat_req)
216228

217229

@@ -223,6 +235,10 @@ def chat_stream_playground(chat_req: ChatPlaygroundRequest):
223235
This endpoint uses the class-based ChatHandler which internally
224236
composes SearchHandler and AddHandler for a clean architecture.
225237
"""
238+
if chat_handler is None:
239+
raise HTTPException(
240+
status_code=503, detail="Chat service is not available. Chat handler not initialized."
241+
)
226242
return chat_handler.handle_chat_stream_playground(chat_req)
227243

228244

src/memos/memories/textual/prefer_text_memory/extractor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ def extract_explicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
7070
try:
7171
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
7272
if not response:
73+
logger.error(
74+
f"[prefer_extractor]: (Error) LLM response content is {response} when extracting explicit preference"
75+
)
7376
return None
7477
response = response.strip().replace("```json", "").replace("```", "").strip()
7578
result = json.loads(response)
@@ -95,6 +98,9 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
9598
try:
9699
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
97100
if not response:
101+
logger.error(
102+
f"[prefer_extractor]: (Error) LLM response content is {response} when extracting implicit preference"
103+
)
98104
return None
99105
response = response.strip().replace("```json", "").replace("```", "").strip()
100106
result = json.loads(response)

0 commit comments

Comments
 (0)