@@ -679,57 +679,6 @@ def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
679679 response_json = json .loads (clean_response )
680680 return response_json ["query" ]
681681
682- def chat (
683- self ,
684- query : str ,
685- user_id : str ,
686- cube_id : str | None = None ,
687- history : MessageList | None = None ,
688- ) -> Generator [str , None , None ]:
689- """Chat with LLM SSE Type.
690- Args:
691- query (str): Query string.
692- user_id (str): User ID.
693- cube_id (str, optional): Custom cube ID for user.
694- history (list[dict], optional): Chat history.
695-
696- Returns:
697- Generator[str, None, None]: The response string generator.
698- """
699- # Use MOSCore's built-in validation
700- if cube_id :
701- self ._validate_cube_access (user_id , cube_id )
702- else :
703- self ._validate_user_exists (user_id )
704-
705- # Load user cubes if not already loaded
706- self ._load_user_cubes (user_id , self .default_cube_config )
707- time_start = time .time ()
708- memories_list = super ().search (query , user_id )["text_mem" ]
709- # Get response from parent MOSCore (returns string, not generator)
710- response = super ().chat (query , user_id )
711- time_end = time .time ()
712-
713- # Use tiktoken for proper token-based chunking
714- for chunk in self ._chunk_response_with_tiktoken (response , chunk_size = 5 ):
715- chunk_data = f"data: { json .dumps ({'type' : 'text' , 'content' : chunk })} \n \n "
716- yield chunk_data
717-
718- # Prepare reference data
719- reference = []
720- for memories in memories_list :
721- memories_json = memories .model_dump ()
722- memories_json ["metadata" ]["ref_id" ] = f"[{ memories .id .split ('-' )[0 ]} ]"
723- memories_json ["metadata" ]["embedding" ] = []
724- memories_json ["metadata" ]["sources" ] = []
725- reference .append (memories_json )
726-
727- yield f"data: { json .dumps ({'type' : 'reference' , 'content' : reference })} \n \n "
728- total_time = round (float (time_end - time_start ), 1 )
729-
730- yield f"data: { json .dumps ({'type' : 'time' , 'content' : {'total_time' : total_time , 'speed_improvement' : '23%' }})} \n \n "
731- yield f"data: { json .dumps ({'type' : 'end' })} \n \n "
732-
733682 def chat_with_references (
734683 self ,
735684 query : str ,
@@ -768,6 +717,8 @@ def chat_with_references(
768717 self ._register_chat_history (user_id )
769718
770719 chat_history = self .chat_history_manager [user_id ]
720+ if history :
721+ chat_history .chat_history = history [- 10 :]
771722 current_messages = [
772723 {"role" : "system" , "content" : system_prompt },
773724 * chat_history .chat_history ,
@@ -853,15 +804,12 @@ def chat_with_references(
853804 yield f"data: { json .dumps ({'type' : 'reference' , 'data' : reference })} \n \n "
854805 total_time = round (float (time_end - time_start ), 1 )
855806 yield f"data: { json .dumps ({'type' : 'time' , 'data' : {'total_time' : total_time , 'speed_improvement' : '23%' }})} \n \n "
856- chat_history .chat_history .append ({"role" : "user" , "content" : query })
857- chat_history .chat_history .append ({"role" : "assistant" , "content" : full_response })
858807 self ._send_message_to_scheduler (
859808 user_id = user_id , mem_cube_id = cube_id , query = query , label = QUERY_LABEL
860809 )
861810 self ._send_message_to_scheduler (
862811 user_id = user_id , mem_cube_id = cube_id , query = full_response , label = ANSWER_LABEL
863812 )
864- self .chat_history_manager [user_id ] = chat_history
865813
866814 yield f"data: { json .dumps ({'type' : 'end' })} \n \n "
867815 self .add (
@@ -880,12 +828,6 @@ def chat_with_references(
880828 ],
881829 mem_cube_id = cube_id ,
882830 )
883- # Keep chat history under 30 messages by removing oldest conversation pair
884- if len (self .chat_history_manager [user_id ].chat_history ) > 10 :
885- self .chat_history_manager [user_id ].chat_history .pop (0 ) # Remove oldest user message
886- self .chat_history_manager [user_id ].chat_history .pop (
887- 0
888- ) # Remove oldest assistant response
889831
890832 def get_all (
891833 self ,
0 commit comments