add: test script

sairin1202 · sairin1202 · commit 5a61f3b5a02e · 2025-11-18T20:30:07.000+08:00
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ memU v0.3.0-Alpha has been released! This version initializes the memorize and r
 Starting from this release, memU will roll out multiple features in the short- to mid-term:
 
 ### Core capabilities iteration
-- [ ] **Multi-modal enhancements** – Support for images, audio, and video
+- [x] **Multi-modal enhancements** – Support for images, audio, and video
 - [ ] **Intention** – Higher-level decision-making and goal management
 - [ ] **Multi-client support** – Switch between OpenAI, Deepseek, Gemini, etc.
 - [ ] **Data persistence expansion** – Support for Postgres, S3, DynamoDB
@@ -88,76 +88,76 @@ Through this three-layer design, **MemU brings genuine memory into the agent lay
   A feedback-driven mechanism continuously adapts the memory structure according to real usage patterns.
 <img width="1280" height="312" alt="image" src="https://github.com/user-attachments/assets/e2c0ac0c-e5cb-44a9-b880-89be142e1ca5" />
 
-## 🚀Get Started
+#### Quick Start
 
-There are three ways to get started with MemU:
-
-### ☁️ Cloud Version ([Online Platform](https://app.memu.so))
-
-The fastest way to integrate your application with memU. Perfect for teams and individuals who want immediate access without setup complexity. We host the models, APIs, and cloud storage, ensuring your application gets the best quality AI memory.
-
-- **Instant Access** - Start integrating AI memories in minutes
-- **Managed Infrastructure** - We handle scaling, updates, and maintenance for optimal memory quality
-- **Premium Support** - Subscribe and get priority assistance from our engineering team
-
-### Step-by-step
-
-**Step 1:** Create account
-
-Create account on https://app.memu.so
-
-Then, go to https://app.memu.so/api-key/ for generating api-keys.
-
-**Step 2:** Add three lines to your code
-```python
-pip install memu-py
-
-# Example usage
-from memu import MemuClient
+**Step 1: Install**
+```bash
+pip install -e .
 ```
 
-**Step 3:** Quick Start
+**Step 2: Run the example**
 ```python
-# Initialize
-memu_client = MemuClient(
-    base_url="https://api.memu.so",
-    api_key=os.getenv("MEMU_API_KEY")
-)
-memu_client.memorize_conversation(
-    conversation=conversation_text, # Recommend longer conversation (~8000 tokens), see https://memu.pro/blog/memu-best-practice for details
-    user_id="user001",
-    user_name="User",
-    agent_id="assistant001",
-    agent_name="Assistant"
-)
+from memu.app import MemoryUser
+import logging
+
+async def test_memory_service():
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+    logger = logging.getLogger("memu")
+    logger.setLevel(logging.DEBUG)
+
+    # Initialize MemoryUser with your OpenAI API key
+    service = MemoryUser(llm_config={"api_key": "your-openai-api-key"})
+
+    # Memorize a conversation
+    memory = await service.memorize(
+        resource_url="tests/data/example_conversation.json",
+        modality="conversation"
+    )
+
+    # Example conversation history for query rewriting
+    conversation_history = [
+        {"role": "user", "content": "Tell me about the user's preferences"},
+        {"role": "assistant", "content": "I'd be happy to help. Let me search the memory."},
+        {"role": "user", "content": "What are their habits?"}
+    ]
+
+    # Test 1: RAG-based Retrieval with conversation history
+    print("\n[Test 1] RAG-based Retrieval with conversation history")
+    retrieved_rag = await service.retrieve(
+        query="What are their habits?",
+        conversation_history=conversation_history,
+        method="rag",
+        top_k=5
+    )
+    print(f"Method: {retrieved_rag.get('method')}")
+    print(f"Original query: {retrieved_rag.get('original_query')}")
+    print(f"Rewritten query: {retrieved_rag.get('rewritten_query')}")
+    print(f"Results: {len(retrieved_rag.get('categories', []))} categories, "
+          f"{len(retrieved_rag.get('items', []))} items")
+
+    # Test 2: LLM-based Retrieval with conversation history
+    print("\n[Test 2] LLM-based Retrieval with conversation history")
+    retrieved_llm = await service.retrieve(
+        query="What are their habits?",
+        conversation_history=conversation_history,
+        method="llm",
+        top_k=5
+    )
+    print(f"Method: {retrieved_llm.get('method')}")
+    print(f"Original query: {retrieved_llm.get('original_query')}")
+    print(f"Rewritten query: {retrieved_llm.get('rewritten_query')}")
+    print(f"Results: {len(retrieved_llm.get('categories', []))} categories, "
+          f"{len(retrieved_llm.get('items', []))} items")
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(test_memory_service())
 ```
-Check [API reference](docs/API_REFERENCE.md) or [our blog](https://memu.pro/blog) for more details.
-
-📖 **See [`example/client/memory.py`](example/client/memory.py) for complete integration details**
-
-✨ **That's it!** MemU remembers everything and helps your AI learn from past conversations.
-
-
-### 🏢 Enterprise Edition
-
-For organizations requiring maximum security, customization, control and best quality:
-
-- **Commercial License** - Full proprietary features, commercial usage rights, white-labeling options
-- **Custom Development** - SSO/RBAC integration, dedicated algorithm team for scenario-specific framework optimization
-- **Intelligence & Analytics** - User behavior analysis, real-time production monitoring, automated agent optimization
-- **Premium Support** - 24/7 dedicated support, custom SLAs, professional implementation services
-
-📧 **Enterprise Inquiries:** [contact@nevamind.ai](mailto:contact@nevamind.ai)
-
-
-### 🏠 Self-Hosting (Community Edition)
-For users and developers who prefer local control, data privacy, or customization:
-
-* **Data Privacy** - Keep sensitive data within your infrastructure
-* **Customization** - Modify and extend the platform to fit your needs
-* **Cost Control** - Avoid recurring cloud fees for large-scale deployments
 
-See [self hosting README](README.self_host.md)
+See [self hosting README](README.self_host.md) for more details.
 
 ---
 
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
@@ -17,6 +17,7 @@
 from memu.prompts.memory_type import PROMPTS as MEMORY_TYPE_PROMPTS
 from memu.prompts.preprocess import PROMPTS as PREPROCESS_PROMPTS
 from memu.prompts.retrieve.judger import PROMPT as RETRIEVE_JUDGER_PROMPT
+from memu.prompts.retrieve.query_rewriter import PROMPT as QUERY_REWRITER_PROMPT
 from memu.storage.local_fs import LocalFS
 from memu.utils.video import VideoFrameExtractor
 from memu.vector.index import cosine_topk
@@ -777,9 +778,56 @@ def _validate_config(
             return model_type()
         return model_type.model_validate(config)
 
-    async def retrieve(self, query: str, *, top_k: int = 5) -> dict[str, Any]:
+    async def retrieve(
+        self,
+        query: str,
+        *,
+        conversation_history: list[dict[str, str]] | None = None,
+        method: str = "rag",
+        top_k: int = 5,
+    ) -> dict[str, Any]:
+        """
+        Retrieve relevant memories based on the query.
+
+        Args:
+            query: The search query
+            conversation_history: Optional conversation history for query rewriting
+            method: Retrieval method - "rag" (vector similarity) or "llm" (LLM-based ranking)
+            top_k: Number of top results to return
+
+        Returns:
+            Dictionary containing original_query, rewritten_query, method, and retrieved results
+        """
+        # Rewrite query if conversation history is provided
+        original_query = query
+        rewritten_query = query
+
+        if conversation_history:
+            rewritten_query = await self._rewrite_query_with_history(query, conversation_history)
+            logger.debug(f"Original query: {original_query}")
+            logger.debug(f"Rewritten query: {rewritten_query}")
+
+        response: dict[str, Any] = {
+            "original_query": original_query,
+            "rewritten_query": rewritten_query,
+            "method": method,
+            "resources": [],
+            "items": [],
+            "categories": [],
+        }
+
+        if method == "rag":
+            return await self._retrieve_rag(rewritten_query, response, top_k)
+        elif method == "llm":
+            return await self._retrieve_llm(rewritten_query, response, top_k)
+        else:
+            msg = f"Unknown retrieval method '{method}'. Use 'rag' or 'llm'."
+            raise ValueError(msg)
+
+    async def _retrieve_rag(self, query: str, response: dict[str, Any], top_k: int) -> dict[str, Any]:
+        """RAG-based retrieval using vector similarity search"""
+        # Use query for embedding
         qvec = (await self.openai.embed([query]))[0]
-        response: dict[str, list[dict[str, Any]]] = {"resources": [], "items": [], "categories": []}
         content_sections: list[str] = []
 
         cat_hits, summary_lookup = await self._rank_categories_by_summary(qvec, top_k)
@@ -806,6 +854,126 @@ async def retrieve(self, query: str, *, top_k: int = 5) -> dict[str, Any]:
 
         return response
 
+    async def _retrieve_llm(self, query: str, response: dict[str, Any], top_k: int) -> dict[str, Any]:
+        """LLM-based retrieval using language model to rank and select memories"""
+        # Get all available memories
+        all_categories = list(self.store.categories.values())
+        all_items = list(self.store.items.values())
+        all_resources = list(self.store.resources.values())
+
+        # Use LLM to select and rank relevant memories
+        if all_categories:
+            selected_categories = await self._llm_rank_memories(query, all_categories, "categories", top_k)
+            response["categories"] = selected_categories
+
+        if all_items:
+            selected_items = await self._llm_rank_memories(query, all_items, "items", top_k)
+            response["items"] = selected_items
+
+        if all_resources:
+            selected_resources = await self._llm_rank_memories(query, all_resources, "resources", top_k)
+            response["resources"] = selected_resources
+
+        return response
+
+    async def _llm_rank_memories(
+        self, query: str, memories: list[Any], memory_type: str, top_k: int
+    ) -> list[dict[str, Any]]:
+        """Use LLM to rank and select relevant memories"""
+        if not memories:
+            return []
+
+        # Limit to top 20 to avoid token limits
+        sample_size = min(len(memories), 20)
+        memories_to_rank = memories[:sample_size]
+
+        # Format memories for LLM
+        formatted_memories = []
+        for idx, mem in enumerate(memories_to_rank):
+            if memory_type == "categories":
+                content = f"Category: {mem.name}\nSummary: {mem.summary or 'N/A'}"
+            elif memory_type == "items":
+                content = f"Item: {mem.summary}"
+            else:  # resources
+                content = f"Resource: {mem.caption or mem.url}"
+            formatted_memories.append(f"[{idx}] {content}")
+
+        memories_text = "\n\n".join(formatted_memories)
+
+        # Create prompt for LLM ranking
+        prompt = f"""Given the query and a list of memories, select the top {top_k} most relevant memories.
+Return only the indices (numbers) of the selected memories, separated by commas.
+
+Query: {query}
+
+Memories:
+{memories_text}
+
+Output format: 0,3,7,... (indices only, comma-separated)
+Selected indices:"""
+
+        response_text = await self.openai.summarize(prompt, system_prompt=None)
+
+        # Parse selected indices
+        selected_indices = self._parse_llm_indices(response_text, len(memories_to_rank))
+
+        # Return selected memories
+        result = []
+        for idx in selected_indices[:top_k]:
+            mem = memories_to_rank[idx]
+            mem_dict = {
+                "id": mem.id,
+                "score": 1.0 - (selected_indices.index(idx) * 0.1),  # Decreasing score
+            }
+            if memory_type == "categories":
+                mem_dict.update({"name": mem.name, "summary": mem.summary})
+            elif memory_type == "items":
+                mem_dict.update({"summary": mem.summary, "memory_type": mem.memory_type})
+            else:
+                mem_dict.update({"url": mem.url, "caption": mem.caption})
+            result.append(mem_dict)
+
+        return result
+
+    def _parse_llm_indices(self, response: str, max_idx: int) -> list[int]:
+        """Parse indices from LLM response"""
+        # Extract numbers from response
+        numbers = re.findall(r"\d+", response)
+        indices = []
+        for num_str in numbers:
+            idx = int(num_str)
+            if 0 <= idx < max_idx and idx not in indices:
+                indices.append(idx)
+        return indices
+
+    async def _rewrite_query_with_history(self, query: str, conversation_history: list[dict[str, str]]) -> str:
+        """Rewrite query using conversation history to resolve references"""
+        # Format conversation history
+        history_text = "\n".join([
+            f"{msg.get('role', 'unknown')}: {msg.get('content', '')}" for msg in conversation_history
+        ])
+
+        # Create prompt for query rewriting
+        prompt = QUERY_REWRITER_PROMPT.format(
+            conversation_history=self._escape_prompt_value(history_text), query=self._escape_prompt_value(query)
+        )
+
+        # Get rewritten query from LLM
+        response = await self.openai.summarize(prompt, system_prompt=None)
+
+        # Parse the rewritten query from the response
+        rewritten_query = self._parse_rewritten_query(response)
+        return rewritten_query or query  # Fall back to original if parsing fails
+
+    def _parse_rewritten_query(self, response: str) -> str | None:
+        """Parse rewritten query from LLM response"""
+        # Try to extract content between <rewritten_query> tags
+        match = re.search(r"<rewritten_query>\s*(.*?)\s*</rewritten_query>", response, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        # If no tags found, return the response as is (fallback)
+        return response.strip()
+
     async def _rank_categories_by_summary(
         self, query_vec: list[float], top_k: int
     ) -> tuple[list[tuple[str, float]], dict[str, str]]:
diff --git a/src/memu/prompts/retrieve/__init__.py b/src/memu/prompts/retrieve/__init__.py
@@ -0,0 +1,4 @@
+from memu.prompts.retrieve.judger import PROMPT as JUDGER_PROMPT
+from memu.prompts.retrieve.query_rewriter import PROMPT as QUERY_REWRITER_PROMPT
+
+__all__ = ["JUDGER_PROMPT", "QUERY_REWRITER_PROMPT"]
diff --git a/src/memu/prompts/retrieve/query_rewriter.py b/src/memu/prompts/retrieve/query_rewriter.py
@@ -0,0 +1,32 @@
+PROMPT = """Your task is to rewrite a user query by resolving references and ambiguities using the conversation history.
+
+## Conversation History:
+{conversation_history}
+
+## Current Query:
+{query}
+
+## Task:
+Analyze the current query and the conversation history. If the query contains:
+- Pronouns (e.g., "they", "it", "their", "his", "her")
+- Referential expressions (e.g., "that", "those", "the same")
+- Implicit context (e.g., "what about...", "and also...")
+- Incomplete information that can be inferred from history
+
+Then rewrite the query to be self-contained and explicit by:
+1. Replacing pronouns with specific entities mentioned in the conversation
+2. Adding necessary context from the conversation history
+3. Making implicit references explicit
+4. Ensuring the rewritten query can be understood without the conversation history
+
+If the query is already self-contained and clear, return it as is.
+
+## Output Format:
+<analysis>
+[Brief analysis of whether the query needs rewriting and why]
+</analysis>
+
+<rewritten_query>
+[The rewritten query that is self-contained and explicit]
+</rewritten_query>
+"""