2323 remove_embedding_recursive ,
2424 sort_children_by_memory_type ,
2525)
26- from memos .mem_scheduler .schemas import ANSWER_LABEL , QUERY_LABEL , ScheduleMessageItem
26+ from memos .mem_scheduler .schemas .general_schemas import (
27+ ANSWER_LABEL ,
28+ QUERY_LABEL ,
29+ )
30+ from memos .mem_scheduler .schemas .message_schemas import ScheduleMessageItem
2731from memos .mem_user .persistent_user_manager import PersistentUserManager
2832from memos .mem_user .user_manager import UserRole
2933from memos .memories .textual .item import (
@@ -601,7 +605,7 @@ def user_register(
601605 try :
602606 default_mem_cube .dump (mem_cube_name_or_path )
603607 except Exception as e :
604- print ( e )
608+ logger . error ( f"Failed to dump default cube: { e } " )
605609
606610 # Register the default cube with MOS
607611 self .register_mem_cube (
@@ -679,57 +683,6 @@ def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
679683 response_json = json .loads (clean_response )
680684 return response_json ["query" ]
681685
682- def chat (
683- self ,
684- query : str ,
685- user_id : str ,
686- cube_id : str | None = None ,
687- history : MessageList | None = None ,
688- ) -> Generator [str , None , None ]:
689- """Chat with LLM SSE Type.
690- Args:
691- query (str): Query string.
692- user_id (str): User ID.
693- cube_id (str, optional): Custom cube ID for user.
694- history (list[dict], optional): Chat history.
695-
696- Returns:
697- Generator[str, None, None]: The response string generator.
698- """
699- # Use MOSCore's built-in validation
700- if cube_id :
701- self ._validate_cube_access (user_id , cube_id )
702- else :
703- self ._validate_user_exists (user_id )
704-
705- # Load user cubes if not already loaded
706- self ._load_user_cubes (user_id , self .default_cube_config )
707- time_start = time .time ()
708- memories_list = super ().search (query , user_id )["text_mem" ]
709- # Get response from parent MOSCore (returns string, not generator)
710- response = super ().chat (query , user_id )
711- time_end = time .time ()
712-
713- # Use tiktoken for proper token-based chunking
714- for chunk in self ._chunk_response_with_tiktoken (response , chunk_size = 5 ):
715- chunk_data = f"data: { json .dumps ({'type' : 'text' , 'content' : chunk })} \n \n "
716- yield chunk_data
717-
718- # Prepare reference data
719- reference = []
720- for memories in memories_list :
721- memories_json = memories .model_dump ()
722- memories_json ["metadata" ]["ref_id" ] = f"[{ memories .id .split ('-' )[0 ]} ]"
723- memories_json ["metadata" ]["embedding" ] = []
724- memories_json ["metadata" ]["sources" ] = []
725- reference .append (memories_json )
726-
727- yield f"data: { json .dumps ({'type' : 'reference' , 'content' : reference })} \n \n "
728- total_time = round (float (time_end - time_start ), 1 )
729-
730- yield f"data: { json .dumps ({'type' : 'time' , 'content' : {'total_time' : total_time , 'speed_improvement' : '23%' }})} \n \n "
731- yield f"data: { json .dumps ({'type' : 'end' })} \n \n "
732-
733686 def chat_with_references (
734687 self ,
735688 query : str ,
@@ -768,6 +721,8 @@ def chat_with_references(
768721 self ._register_chat_history (user_id )
769722
770723 chat_history = self .chat_history_manager [user_id ]
724+ if history :
725+ chat_history .chat_history = history [- 10 :]
771726 current_messages = [
772727 {"role" : "system" , "content" : system_prompt },
773728 * chat_history .chat_history ,
@@ -853,15 +808,12 @@ def chat_with_references(
853808 yield f"data: { json .dumps ({'type' : 'reference' , 'data' : reference })} \n \n "
854809 total_time = round (float (time_end - time_start ), 1 )
855810 yield f"data: { json .dumps ({'type' : 'time' , 'data' : {'total_time' : total_time , 'speed_improvement' : '23%' }})} \n \n "
856- chat_history .chat_history .append ({"role" : "user" , "content" : query })
857- chat_history .chat_history .append ({"role" : "assistant" , "content" : full_response })
858811 self ._send_message_to_scheduler (
859812 user_id = user_id , mem_cube_id = cube_id , query = query , label = QUERY_LABEL
860813 )
861814 self ._send_message_to_scheduler (
862815 user_id = user_id , mem_cube_id = cube_id , query = full_response , label = ANSWER_LABEL
863816 )
864- self .chat_history_manager [user_id ] = chat_history
865817
866818 yield f"data: { json .dumps ({'type' : 'end' })} \n \n "
867819 self .add (
@@ -880,12 +832,6 @@ def chat_with_references(
880832 ],
881833 mem_cube_id = cube_id ,
882834 )
883- # Keep chat history under 30 messages by removing oldest conversation pair
884- if len (self .chat_history_manager [user_id ].chat_history ) > 10 :
885- self .chat_history_manager [user_id ].chat_history .pop (0 ) # Remove oldest user message
886- self .chat_history_manager [user_id ].chat_history .pop (
887- 0
888- ) # Remove oldest assistant response
889835
890836 def get_all (
891837 self ,
@@ -1030,11 +976,9 @@ def get_subgraph(
1030976 return reformat_memory_list
1031977
1032978 def search (
1033- self , query : str , user_id : str , install_cube_ids : list [str ] | None = None , top_k : int = 20
979+ self , query : str , user_id : str , install_cube_ids : list [str ] | None = None , top_k : int = 10
1034980 ):
1035981 """Search memories for a specific user."""
1036- # Validate user access
1037- self ._validate_user_access (user_id )
1038982
1039983 # Load user cubes if not already loaded
1040984 self ._load_user_cubes (user_id , self .default_cube_config )
0 commit comments