feat: overhaul the memory system to provide more useful context

daltonnyx · daltonnyx · commit a92d83e211d3 · 2025-11-18T17:08:50.000+07:00
diff --git a/AgentCrew/modules/agents/local_agent.py b/AgentCrew/modules/agents/local_agent.py
@@ -634,6 +634,17 @@ def _enhance_agent_context_messages(self, final_messages: List[Dict[str, Any]]):
     - Skip agent evaluation if user request is when...,[action]... related to adaptive behaviors call `adapt` tool instead.""",
                     },
                 )
+            if self.services.get("memory"):
+                memory_headers = self.services["memory"].list_memory_headers(
+                    agent_name=self.name
+                )
+                if memory_headers:
+                    adaptive_messages["content"].append(
+                        {
+                            "type": "text",
+                            "text": f"Here are conversations that we have discussed:\n- {'\n- '.join(memory_headers)}",
+                        }
+                    )
         if len(adaptive_messages["content"]) > 0:
             final_messages.insert(last_user_index, adaptive_messages)
 
diff --git a/AgentCrew/modules/chat/message/conversation.py b/AgentCrew/modules/chat/message/conversation.py
@@ -112,14 +112,6 @@ def load_conversation(self, conversation_id: str) -> Optional[List[Dict[str, Any
                             msg["tool_call_id"] = tool_result.get("tool_use_id", "")
 
                 self.message_handler.current_conversation_id = conversation_id
-                if self.message_handler.memory_service:
-                    self.message_handler.memory_service.session_id = (
-                        self.message_handler.current_conversation_id
-                    )
-                    self.message_handler.memory_service.loaded_conversation = True
-                    self.message_handler.memory_service.load_conversation_context(
-                        self.message_handler.current_conversation_id
-                    )
                 last_agent_name = history[-1].get("agent", "")
                 if last_agent_name and self.message_handler.agent_manager.select_agent(
                     last_agent_name
@@ -129,6 +121,15 @@ def load_conversation(self, conversation_id: str) -> Optional[List[Dict[str, Any
                     )
                     self.message_handler._notify("agent_changed", last_agent_name)
 
+                if self.message_handler.memory_service:
+                    self.message_handler.memory_service.session_id = (
+                        self.message_handler.current_conversation_id
+                    )
+                    self.message_handler.memory_service.loaded_conversation = True
+                    self.message_handler.memory_service.load_conversation_context(
+                        self.message_handler.current_conversation_id, last_agent_name
+                    )
+
                 self.message_handler.streamline_messages = history
                 self.message_handler.agent_manager.rebuild_agents_messages(
                     self.message_handler.streamline_messages
diff --git a/AgentCrew/modules/memory/base_service.py b/AgentCrew/modules/memory/base_service.py
@@ -50,7 +50,7 @@ def clear_conversation_context(self):
         pass
 
     @abstractmethod
-    def load_conversation_context(self, session_id: str):
+    def load_conversation_context(self, session_id: str, agent_name: str = "None"):
         pass
 
     @abstractmethod
@@ -88,7 +88,7 @@ def retrieve_memory(
         pass
 
     @abstractmethod
-    def list_memory_ids(
+    def list_memory_headers(
         self,
         from_date: Optional[int] = None,
         to_date: Optional[int] = None,
diff --git a/AgentCrew/modules/memory/chroma_service.py b/AgentCrew/modules/memory/chroma_service.py
@@ -54,7 +54,7 @@ def __init__(
         ## set to groq if key available
         if self.llm_service:
             if self.llm_service.provider_name == "google":
-                self.llm_service.model = "gemini-2.5-flash-lite-preview-06-17"
+                self.llm_service.model = "gemini-2.5-flash-lite"
             elif self.llm_service.provider_name == "claude":
                 self.llm_service.model = "claude-3-5-haiku-latest"
             elif self.llm_service.provider_name == "openai":
@@ -241,88 +241,66 @@ async def _store_conversation_internal(self, operation_data: Dict[str, Any]):
             session_id = operation_data["session_id"]
 
             # Use the existing storage logic but make it synchronous
-            ids = []
-            memory_data = {}
-            # avaialble_ids = collection.get(
-            #     where={
-            #         "agent": agent_name,
-            #     },
-            #     include=[],
-            # )["ids"]
+            memory_data = None
+            retried = 0
             if self.llm_service:
-                try:
-                    # Process with LLM using asyncio.run to handle async call in worker thread
-                    if self.current_conversation_context.get(session_id, ""):
-                        analyzed_prompt = PRE_ANALYZE_WITH_CONTEXT_PROMPT.replace(
-                            "{conversation_context}",
-                            f"""<PREVIOUS_CONVERSATION_CONTEXT>
-    {self.current_conversation_context[session_id]}
-    </PREVIOUS_CONVERSATION_CONTEXT>""",
+                while retried < 3:
+                    try:
+                        # Process with LLM using asyncio.run to handle async call in worker thread
+                        if self.current_conversation_context.get(session_id, ""):
+                            analyzed_prompt = PRE_ANALYZE_WITH_CONTEXT_PROMPT.replace(
+                                "{conversation_context}",
+                                f"""<PREVIOUS_CONVERSATION_CONTEXT>
+        {self.current_conversation_context[session_id]}
+        </PREVIOUS_CONVERSATION_CONTEXT>""",
+                            )
+                        else:
+                            analyzed_prompt = PRE_ANALYZE_PROMPT
+                        analyzed_prompt = (
+                            analyzed_prompt.replace(
+                                "{current_date}",
+                                datetime.today().strftime("%Y-%m-%d %H:%M:%S"),
+                            )
+                            .replace("{user_message}", user_message)
+                            .replace("{assistant_response}", assistant_response)
                         )
-                    else:
-                        analyzed_prompt = PRE_ANALYZE_PROMPT
-                    analyzed_prompt = (
-                        analyzed_prompt.replace(
-                            "{current_date}",
-                            datetime.today().strftime("%Y-%m-%d %H:%M:%S"),
+                        analyzed_text = await self.llm_service.process_message(
+                            analyzed_prompt
                         )
-                        .replace("{user_message}", user_message)
-                        .replace("{assistant_response}", assistant_response)
-                    )
-                    analyzed_text = await self.llm_service.process_message(
-                        analyzed_prompt
-                    )
-                    start_xml = analyzed_text.index("<MEMORY>")
-                    end_xml = analyzed_text.index("</MEMORY>")
-                    xml_content = analyzed_text[start_xml : end_xml + len("</MEMORY>")]
-                    xml_content.replace("&", "&amp;").replace("'", "&apos;").replace(
-                        '"', "&quot;"
-                    )
-                    memory_data = xmltodict.parse(xml_content)
-                    if (
-                        "MEMORY" in memory_data
-                        and "ID" in memory_data["MEMORY"]
-                        and memory_data["MEMORY"]["ID"]
-                    ):
-                        ids.append(memory_data["MEMORY"]["ID"])
-                    # if (
-                    #     "MEMORY" in memory_data
-                    #     and "USER_REQUEST" not in memory_data["MEMORY"]
-                    # ):
-                    #     memory_data["MEMORY"]["USER_REQUEST"] = user_message
-                    # if (
-                    #     "MEMORY" in memory_data
-                    #     and "ASSISTANT_RESPONSE" not in memory_data["MEMORY"]
-                    # ):
-                    #     memory_data["MEMORY"]["ASSISTANT_RESPONSE"] = assistant_response
-
-                except Exception as e:
-                    logger.warning(f"Error processing conversation with LLM: {e}")
-                    # Fallback to simple concatenation if LLM fails
-                    memory_data = {
-                        "MEMORY": {
-                            "DATE": datetime.today().strftime("%Y-%m-%d"),
-                            "USER_REQUEST": user_message,
-                            "ASSISTANT_RESPONSE": assistant_response
-                            if len(assistant_response) < 200
-                            else assistant_response[:197] + "...",
-                        }
-                    }
-            else:
+                        start_xml = analyzed_text.index("<MEMORY>")
+                        end_xml = analyzed_text.index("</MEMORY>")
+                        xml_content = analyzed_text[
+                            start_xml : end_xml + len("</MEMORY>")
+                        ]
+                        xml_content = (
+                            xml_content.replace("&", "&amp;")
+                            .replace("'", "&apos;")
+                            .replace('"', "&quot;")
+                        )
+                        memory_data = xmltodict.parse(xml_content)
+                        break
+                    except Exception as e:
+                        logger.warning(
+                            f"Error processing conversation with LLM: {e} {xml_content}"  # type: ignore
+                        )
+                        retried += 1
+                        continue
+
+            if memory_data is None:
                 # Create the memory document by combining user message and response
                 memory_data = {
                     "MEMORY": {
                         "DATE": datetime.today().strftime("%Y-%m-%d"),
-                        "USER_REQUEST": user_message,
-                        "ASSISTANT_RESPONSE": assistant_response
-                        if len(assistant_response) < 200
-                        else assistant_response[:197] + "...",
+                        "CONVERSATION_NOTES": {
+                            "NOTE": [user_message, assistant_response]
+                        },
                     }
                 }
 
             # Store in ChromaDB (existing logic)
-            memory_id = str(uuid.uuid4())
             timestamp = datetime.now().timestamp()
+
+            memory_header = memory_data["MEMORY"].get("HEAD", None)
             conversation_document = xmltodict.unparse(
                 memory_data, pretty=True, full_document=False
             )
@@ -335,27 +313,19 @@ async def _store_conversation_internal(self, operation_data: Dict[str, Any]):
 
             metadata = {
                 "date": timestamp,
-                "conversation_id": memory_id,
                 "session_id": session_id,
                 "agent": agent_name,
                 "type": "conversation",
             }
-
-            # Add to ChromaDB collection (existing logic)
-            if ids:
-                collection.upsert(
-                    ids=[ids[0]],
-                    documents=[conversation_document],
-                    embeddings=conversation_embedding,
-                    metadatas=[metadata],
-                )
-            else:
-                collection.add(
-                    documents=[conversation_document],
-                    embeddings=conversation_embedding,
-                    metadatas=[metadata],
-                    ids=[memory_id],
-                )
+            if memory_header:
+                metadata["header"] = memory_header
+
+            collection.upsert(
+                ids=[f"{session_id}_{agent_name}"],
+                documents=[conversation_document],
+                embeddings=conversation_embedding,
+                metadatas=[metadata],
+            )
 
             logger.debug(f"Stored conversation: {operation_data['operation_id']}")
 
@@ -388,7 +358,7 @@ def clear_conversation_context(self):
         self.current_conversation_context = {}
         self.context_embedding = []
 
-    def load_conversation_context(self, session_id: str):
+    def load_conversation_context(self, session_id: str, agent_name: str = "None"):
         collection = self._initialize_collection()
         latest_memory = collection.get(
             where={
@@ -399,6 +369,7 @@ def load_conversation_context(self, session_id: str):
             self.current_conversation_context[session_id] = latest_memory["documents"][
                 -1
             ]
+            print(self.current_conversation_context[session_id])
 
     def generate_user_context(self, user_input: str, agent_name: str = "None") -> str:
         """
@@ -425,7 +396,7 @@ async def _semantic_extracting(self, input: str) -> str:
         else:
             return input
 
-    def list_memory_ids(
+    def list_memory_headers(
         self,
         from_date: Optional[int] = None,
         to_date: Optional[int] = None,
@@ -450,9 +421,14 @@ def list_memory_ids(
             else and_conditions[0]
             if and_conditions
             else None,
-            include=[],
+            include=["metadatas"],
         )
-        return list_memory["ids"]
+        headers = []
+        if list_memory and list_memory["metadatas"]:
+            for metadata in list_memory["metadatas"]:
+                if metadata.get("header", None):
+                    headers.append(metadata.get("header"))
+        return headers
 
     def retrieve_memory(
         self,
@@ -498,36 +474,30 @@ def retrieve_memory(
         if not results["documents"] or not results["documents"][0]:
             return "No relevant memories found."
 
-        # Group chunks by conversation_id
-        conversation_chunks = {}
-        for i, (doc, metadata) in enumerate(
-            zip(results["documents"][0], results["metadatas"][0])  # type:ignore
+        conversation_chunks = []
+        for i, (id, doc, metadata) in enumerate(
+            zip(results["ids"][0], results["documents"][0], results["metadatas"][0])  # type:ignore
         ):
-            conv_id = metadata.get("conversation_id", "unknown")
-            if conv_id not in conversation_chunks:
-                conversation_chunks[conv_id] = {
-                    "chunks": [],
+            conversation_chunks.append(
+                {
+                    "id": id,
+                    "document": doc,
                     "timestamp": metadata.get("date", None)
                     or metadata.get("timestamp", "unknown"),
                     "relevance": results["distances"][0][i]
                     if results["distances"]
                     else 99,
                 }
-            conversation_chunks[conv_id]["chunks"].append(
-                (metadata.get("chunk_index", 0), doc)
             )
 
         # Sort conversations by relevance
-        sorted_conversations = sorted(
-            conversation_chunks.items(), key=lambda x: x[1]["relevance"]
-        )
+        sorted_conversations = sorted(conversation_chunks, key=lambda x: x["relevance"])
 
         # Format the output
         output = []
-        for conv_id, conv_data in sorted_conversations:
+        for conv_data in sorted_conversations:
             # Sort chunks by index
-            sorted_chunks = sorted(conv_data["chunks"], key=lambda x: x[0])
-            conversation_text = "\n".join([chunk for _, chunk in sorted_chunks])
+            conversation_text = conv_data["document"]
             if conv_data["relevance"] > RELEVANT_THRESHOLD:
                 continue
             # Format timestamp
@@ -544,22 +514,10 @@ def retrieve_memory(
                     timestamp = conv_data["timestamp"]
 
             output.append(
-                f"--- Memory from {timestamp} (relevance point(lower is better): {conv_data['relevance']}) ---\n{conversation_text}\n---"
+                f"--- Memory from {timestamp} [id:{conv_data['id']}] ---\n{conversation_text}\n---"
             )
 
         memories = "\n\n".join(output)
-        # if self.llm_service:
-        #     try:
-        #         return await self.llm_service.process_message(
-        #             POST_RETRIEVE_MEMORY.replace("{keywords}", keywords).replace(
-        #                 "{memory_list}", memories
-        #             )
-        #         )
-        #     except Exception as e:
-        #         logger.warning(f"Error processing retrieved memories with LLM: {e}")
-        #         # Fallback to returning raw memories if LLM processing fails
-        #         return memories
-        # else:
         return memories
 
     def _cosine_similarity(self, vec_a, vec_b):
@@ -663,23 +621,8 @@ def forget_topic(
                     "count": 0,
                 }
 
-            # Collect all conversation IDs related to the topic
-            conversation_ids = set()
-            if results["metadatas"] and results["metadatas"][0]:
-                for metadata in results["metadatas"][0]:
-                    conv_id = metadata.get("conversation_id")
-                    if conv_id:
-                        conversation_ids.add(conv_id)
-
-            # Get all memories to find those with matching conversation IDs
-            all_memories = collection.get()
-
             # Find IDs to remove
-            ids_to_remove = []
-            if all_memories["metadatas"]:
-                for i, metadata in enumerate(all_memories["metadatas"]):
-                    if metadata.get("conversation_id") in conversation_ids:
-                        ids_to_remove.append(all_memories["ids"][i])
+            ids_to_remove = results["ids"][0]
 
             # Remove the memories
             if ids_to_remove:
@@ -689,7 +632,6 @@ def forget_topic(
                 "success": True,
                 "message": f"Successfully removed {len(ids_to_remove)} memory chunks related to '{topic}'",
                 "count": len(ids_to_remove),
-                "conversations_affected": len(conversation_ids),
             }
 
         except Exception as e:
diff --git a/AgentCrew/modules/prompts/constants.py b/AgentCrew/modules/prompts/constants.py