feat: support role content format

sairin1202 · sairin1202 · commit 69a919483f54 · 2025-11-21T00:10:42.000+08:00
diff --git a/README.md b/README.md
@@ -101,7 +101,7 @@ pip install memu-py
 ### Basic Usage
 
 ```python
-from memu.app import MemoryUser
+from memu.app import MemoryService
 import logging
 
 async def test_memory_service():
@@ -112,47 +112,43 @@ async def test_memory_service():
     logger = logging.getLogger("memu")
     logger.setLevel(logging.DEBUG)
 
-    # Initialize MemoryUser with your OpenAI API key
-    service = MemoryUser(llm_config={"api_key": "your-openai-api-key"})
+    # Initialize MemoryService with your OpenAI API key
+    service = MemoryService(llm_config={"api_key": "your-openai-api-key"})
 
     # Memorize a conversation
     memory = await service.memorize(
         resource_url="tests/example/example_conversation.json",
         modality="conversation"
     )
 
-    # Example conversation history for query rewriting
-    conversation_history = [
-        {"role": "user", "content": "Tell me about the user's preferences"},
-        {"role": "assistant", "content": "I'd be happy to help. Let me search the memory."},
-        {"role": "user", "content": "What are their habits?"}
+    # Test 1: RAG-based Retrieval with query context
+    # Multiple queries enable automatic query rewriting with context
+    print("\n[Test 1] RAG-based Retrieval with query context")
+    queries_with_context = [
+        {"role": "user", "content": {"text": "Tell me about the user's preferences"}},
+        {"role": "assistant", "content": {"text": "I can help you with that. Let me search the memory."}},
+        {"role": "user", "content": {"text": "What are their habits?"}},
     ]
-
-    # Test 1: RAG-based Retrieval with conversation history
-    print("\n[Test 1] RAG-based Retrieval with conversation history")
-    retrieved_rag = await service.retrieve(
-        query="What are their habits?",
-        conversation_history=conversation_history,
-        retrieve_config={"method": "rag", "top_k": 5}
-    )
+    retrieved_rag = await service.retrieve(queries=queries_with_context)
     print(f"Needs retrieval: {retrieved_rag.get('needs_retrieval')}")
     print(f"Original query: {retrieved_rag.get('original_query')}")
     print(f"Rewritten query: {retrieved_rag.get('rewritten_query')}")
+    print(f"Next step query: {retrieved_rag.get('next_step_query')}")
     print(f"Results: {len(retrieved_rag.get('categories', []))} categories, "
           f"{len(retrieved_rag.get('items', []))} items")
 
-    # Test 2: LLM-based Retrieval with conversation history
-    print("\n[Test 2] LLM-based Retrieval with conversation history")
-    retrieved_llm = await service.retrieve(
-        query="What are their habits?",
-        conversation_history=conversation_history,
-        retrieve_config={"method": "llm", "top_k": 5}
-    )
-    print(f"Needs retrieval: {retrieved_llm.get('needs_retrieval')}")
-    print(f"Original query: {retrieved_llm.get('original_query')}")
-    print(f"Rewritten query: {retrieved_llm.get('rewritten_query')}")
-    print(f"Results: {len(retrieved_llm.get('categories', []))} categories, "
-          f"{len(retrieved_llm.get('items', []))} items")
+    # Test 2: Single query without context (no rewriting)
+    print("\n[Test 2] Single query without context")
+    queries_no_context = [
+        {"role": "user", "content": {"text": "What are their habits?"}}
+    ]
+    retrieved_single = await service.retrieve(queries=queries_no_context)
+    print(f"Needs retrieval: {retrieved_single.get('needs_retrieval')}")
+    print(f"Original query: {retrieved_single.get('original_query')}")
+    print(f"Rewritten query: {retrieved_single.get('rewritten_query')}")
+    print(f"Next step query: {retrieved_single.get('next_step_query')}")
+    print(f"Results: {len(retrieved_single.get('categories', []))} categories, "
+          f"{len(retrieved_single.get('items', []))} items")
 
 if __name__ == "__main__":
     import asyncio
@@ -163,6 +159,22 @@ if __name__ == "__main__":
 
 MemU provides two distinct retrieval approaches, each optimized for different scenarios:
 
+#### **Query Structure**
+Queries are passed as a list of message objects in the format:
+```python
+[
+    {"role": "user", "content": {"text": "Tell me about the user's preferences"}},
+    {"role": "assistant", "content": {"text": "I can help you with that."}},
+    {"role": "user", "content": {"text": "What are their habits?"}}
+]
+```
+
+- **Roles** can be `user`, `assistant`, or other custom roles
+- The **last query** in the list is the current query
+- **Previous queries** (with their roles) provide context for automatic query rewriting
+- If only **one query** is provided, no rewriting occurs
+- The system returns a `next_step_query` to suggest the next retrieval step
+
 #### **1. RAG-based Retrieval (`method="rag"`)**
 Fast embedding-based vector search using cosine similarity. Ideal for:
 - Large-scale datasets
@@ -190,9 +202,10 @@ This method uses the LLM to:
 
 Both methods support:
 - **Full traceability**: Each retrieved item includes its `resource_id`, allowing you to trace back to the original source
-- **Conversation-aware rewriting**: Automatically resolves pronouns and references using conversation history
+- **Context-aware rewriting**: Automatically resolves pronouns and references using previous queries as context
 - **Pre-retrieval decision**: Intelligently determines if memory retrieval is needed for the query
 - **Progressive search**: Stops early if sufficient information is found at higher layers
+- **Next step suggestion**: Returns `next_step_query` for iterative multi-turn retrieval
 
 
 
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
@@ -785,13 +785,14 @@ def _validate_config(
 
     async def retrieve(
         self,
-        queries: list[str],
+        queries: list[dict[str, Any]],
     ) -> dict[str, Any]:
         """
         Retrieve relevant memories based on the query using either RAG-based or LLM-based search.
 
         Args:
-            queries: List of query strings. The last one is the current query, others are context.
+            queries: List of query messages in format [{"role": "user", "content": {"text": "..."}}].
+                     The last one is the current query, others are context.
                      If list has only 1 element, no query rewriting is performed.
 
         Returns:
@@ -813,12 +814,13 @@ async def retrieve(
         if not queries:
             raise ValueError("empty_queries")
 
-        current_query = queries[-1]
-        context_queries = queries[:-1] if len(queries) > 1 else []
+        # Extract text from the query structure
+        current_query = self._extract_query_text(queries[-1])
+        context_queries_objs = queries[:-1] if len(queries) > 1 else []
 
         # Step 1: Decide if retrieval is needed
         needs_retrieval, rewritten_query = await self._decide_if_retrieval_needed(
-            current_query, context_queries, retrieved_content=None
+            current_query, context_queries_objs, retrieved_content=None
         )
 
         # If only one query, do not use the rewritten version (use original)
@@ -842,11 +844,11 @@ async def retrieve(
         # Step 2: Perform retrieval with rewritten query using configured method
         if self.retrieve_config.method == "llm":
             results = await self._llm_based_retrieve(
-                rewritten_query, top_k=self.retrieve_config.top_k, context_queries=context_queries
+                rewritten_query, top_k=self.retrieve_config.top_k, context_queries=context_queries_objs
             )
         else:  # rag
             results = await self._embedding_based_retrieve(
-                rewritten_query, top_k=self.retrieve_config.top_k, context_queries=context_queries
+                rewritten_query, top_k=self.retrieve_config.top_k, context_queries=context_queries_objs
             )
 
         # Add metadata
@@ -874,7 +876,7 @@ async def _rank_categories_by_summary(
     async def _decide_if_retrieval_needed(
         self,
         query: str,
-        context_queries: list[str] | None,
+        context_queries: list[dict[str, Any]] | None,
         retrieved_content: str | None = None,
         system_prompt: str | None = None,
     ) -> tuple[bool, str]:
@@ -883,7 +885,7 @@ async def _decide_if_retrieval_needed(
 
         Args:
             query: The current query string
-            context_queries: List of context queries
+            context_queries: List of previous query objects with role and content
             retrieved_content: Content retrieved so far (if checking for sufficiency)
             system_prompt: Optional system prompt override
 
@@ -908,17 +910,61 @@ async def _decide_if_retrieval_needed(
 
         return decision == "RETRIEVE", rewritten
 
-    def _format_query_context(self, queries: list[str] | None) -> str:
-        """Format query context for prompts"""
+    def _format_query_context(self, queries: list[dict[str, Any]] | None) -> str:
+        """Format query context for prompts, including role information"""
         if not queries:
             return "No query context."
 
         lines = []
         for q in queries:
-            lines.append(f"- {q}")
+            if isinstance(q, str):
+                # Backward compatibility
+                lines.append(f"- {q}")
+            elif isinstance(q, dict):
+                role = q.get("role", "user")
+                content = q.get("content")
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                elif isinstance(content, str):
+                    text = content
+                else:
+                    text = str(content)
+                lines.append(f"- [{role}]: {text}")
+            else:
+                lines.append(f"- {q!s}")
 
         return "\n".join(lines)
 
+    @staticmethod
+    def _extract_query_text(query: dict[str, Any]) -> str:
+        """
+        Extract text content from query message structure.
+
+        Args:
+            query: Query in format {"role": "user", "content": {"text": "..."}}
+
+        Returns:
+            The extracted text string
+        """
+        if isinstance(query, str):
+            # Backward compatibility: if it's already a string, return it
+            return query
+
+        if not isinstance(query, dict):
+            raise TypeError("INVALID")
+
+        content = query.get("content")
+        if isinstance(content, dict):
+            text = content.get("text", "")
+            if not text:
+                raise ValueError("EMPTY")
+            return str(text)
+        elif isinstance(content, str):
+            # Also support {"role": "user", "content": "text"} format
+            return content
+        else:
+            raise TypeError("INVALID")
+
     def _extract_decision(self, raw: str) -> str:
         """Extract RETRIEVE or NO_RETRIEVE decision from LLM response"""
         if not raw:
@@ -946,7 +992,7 @@ def _extract_rewritten_query(self, raw: str) -> str | None:
         return None
 
     async def _embedding_based_retrieve(
-        self, query: str, top_k: int, context_queries: list[str] | None
+        self, query: str, top_k: int, context_queries: list[dict[str, Any]] | None
     ) -> dict[str, Any]:
         """Embedding-based retrieval with query rewriting and judging at each tier"""
         current_query = query
@@ -1056,7 +1102,9 @@ def _extract_judgement(self, raw: str) -> str:
             return "ENOUGH"
         return "MORE"
 
-    async def _llm_based_retrieve(self, query: str, top_k: int, context_queries: list[str] | None) -> dict[str, Any]:
+    async def _llm_based_retrieve(
+        self, query: str, top_k: int, context_queries: list[dict[str, Any]] | None
+    ) -> dict[str, Any]:
         """
         LLM-based retrieval that uses language model to search and rank results
         in a hierarchical manner, with query rewriting and judging at each tier.