feat: add non-RAG retrieve solution (#84)

sairin1202 · ankaisen · Copilot · web-flow · commit fb96e5405f1c · 2025-11-18T21:48:44.000+09:00
Co-authored-by: An Kaisen &lt;51148505+ankaisen@users.noreply.github.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/src/memu/app/__init__.py b/src/memu/app/__init__.py
@@ -1,4 +1,4 @@
 from memu.app.service import MemoryUser
-from memu.app.settings import BlobConfig, DatabaseConfig, LLMConfig, MemorizeConfig
+from memu.app.settings import BlobConfig, DatabaseConfig, LLMConfig, MemorizeConfig, RetrieveConfig
 
-__all__ = ["BlobConfig", "DatabaseConfig", "LLMConfig", "MemorizeConfig", "MemoryUser"]
+__all__ = ["BlobConfig", "DatabaseConfig", "LLMConfig", "MemorizeConfig", "MemoryUser", "RetrieveConfig"]
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
@@ -55,6 +55,17 @@ class DatabaseConfig(BaseModel):
     provider: str = Field(default="memory")
 
 
+class RetrieveConfig(BaseModel):
+    method: str = Field(
+        default="rag",
+        description="Retrieval method: 'rag' for embedding-based vector search, 'llm' for LLM-based ranking.",
+    )
+    top_k: int = Field(
+        default=5,
+        description="Maximum number of results to return per category.",
+    )
+
+
 class MemorizeConfig(BaseModel):
     category_assign_threshold: float = Field(default=0.25)
     default_summary_prompt: str = Field(default="Summarize the text in one short paragraph.")
diff --git a/src/memu/prompts/retrieve/llm_category_ranker.py b/src/memu/prompts/retrieve/llm_category_ranker.py
@@ -0,0 +1,24 @@
+PROMPT = """Your task is to search through the provided categories and identify the most relevant ones for the given query.
+
+Analyze the query and all available categories, then select and rank the top-{top_k} most relevant categories.
+
+## Query:
+{query}
+
+## Available Categories:
+{categories_data}
+
+## Output Format:
+Provide your response as a JSON array of category IDs, ordered from most to least relevant:
+```json
+{{
+  "categories": ["category_id_1", "category_id_2", "category_id_3"]
+}}
+```
+
+Important:
+- Include up to {top_k} most relevant categories
+- Order matters: first ID should be most relevant
+- Only include categories that are actually relevant to the query
+- Empty array is acceptable if no relevant categories are found
+"""
diff --git a/src/memu/prompts/retrieve/llm_item_ranker.py b/src/memu/prompts/retrieve/llm_item_ranker.py
@@ -0,0 +1,27 @@
+PROMPT = """Your task is to search through the provided memory items and identify the most relevant ones for the given query.
+
+These memory items belong to the following relevant categories that were already identified:
+{relevant_categories}
+
+Analyze the query and the available memory items, then select and rank the top-{top_k} most relevant items.
+
+## Query:
+{query}
+
+## Available Memory Items:
+{items_data}
+
+## Output Format:
+Provide your response as a JSON array of item IDs, ordered from most to least relevant:
+```json
+{{
+  "items": ["item_id_1", "item_id_2", "item_id_3"]
+}}
+```
+
+Important:
+- Include up to {top_k} most relevant items
+- Order matters: first ID should be most relevant
+- Only include items that are actually relevant to the query
+- Empty array is acceptable if no relevant items are found
+"""
diff --git a/src/memu/prompts/retrieve/llm_resource_ranker.py b/src/memu/prompts/retrieve/llm_resource_ranker.py
@@ -0,0 +1,27 @@
+PROMPT = """Your task is to search through the provided resources and identify the most relevant ones for the given query.
+
+These resources are related to the following categories and items that were already identified:
+{context_info}
+
+Analyze the query and the available resources, then select and rank the top-{top_k} most relevant resources.
+
+## Query:
+{query}
+
+## Available Resources:
+{resources_data}
+
+## Output Format:
+Provide your response as a JSON array of resource IDs, ordered from most to least relevant:
+```json
+{{
+  "resources": ["resource_id_1", "resource_id_2", "resource_id_3"]
+}}
+```
+
+Important:
+- Include up to {top_k} most relevant resources
+- Order matters: first ID should be most relevant
+- Only include resources that are actually relevant to the query
+- Empty array is acceptable if no relevant resources are found
+"""
diff --git a/src/memu/prompts/retrieve/pre_retrieval_decision.py b/src/memu/prompts/retrieve/pre_retrieval_decision.py
@@ -0,0 +1,35 @@
+SYSTEM_PROMPT = """You are a retrieval decision assistant. Your task is to analyze whether a query requires retrieving information from memory or can be answered directly without retrieval.
+
+Consider these scenarios that DON'T need retrieval:
+- Greetings, casual chat, acknowledgments
+- Questions about current conversation/context only
+- General knowledge questions
+- Requests for clarification
+- Meta-questions about the system itself
+
+Consider these scenarios that NEED retrieval:
+- Questions about past events, conversations, or interactions
+- Queries about user preferences, habits, or characteristics
+- Requests to recall specific information
+- Questions that reference historical data"""
+
+USER_PROMPT = """Analyze the following query in the context of the conversation to determine if memory retrieval is needed.
+
+## Conversation History (Last 3 Turns):
+{conversation_history}
+
+## Current Query:
+{query}
+
+## Task:
+1. Determine if this query requires retrieving information from memory
+2. If retrieval is needed, rewrite the query to incorporate relevant context from the conversation history
+
+## Output Format:
+<decision>
+[Either "RETRIEVE" or "NO_RETRIEVE"]
+</decision>
+
+<rewritten_query>
+[If RETRIEVE: provide a rewritten query with context. If NO_RETRIEVE: return original query]
+</rewritten_query>"""
diff --git a/src/memu/prompts/retrieve/query_rewriter_judger.py b/src/memu/prompts/retrieve/query_rewriter_judger.py
@@ -0,0 +1,38 @@
+SYSTEM_PROMPT = """You are a query rewriting and retrieval sufficiency judge. You have two tasks:
+
+1. **Query Rewriting**: Incorporate conversation context to make the query more specific and clear
+2. **Sufficiency Judgment**: Determine if the retrieved content is enough to answer the query
+
+You should be conservative - only mark as "ENOUGH" when the content truly provides adequate information."""
+
+USER_PROMPT = """Given the conversation history, current query, and retrieved content, perform two tasks:
+
+## Conversation History (Last 3 Turns):
+{conversation_history}
+
+## Original Query:
+{original_query}
+
+## Retrieved Content So Far:
+{retrieved_content}
+
+## Tasks:
+
+### 1. Query Rewriting
+Rewrite the query to incorporate relevant context from the conversation history. Make it more specific and clear.
+
+### 2. Sufficiency Judgment
+Analyze if the retrieved content is sufficient to answer the query. Consider:
+1. Does the retrieved content directly address the query?
+2. Is the information specific and detailed enough?
+3. Are there obvious gaps or missing details?
+4. Did the user explicitly ask to recall or remember more information?
+
+## Output Format:
+<rewritten_query>
+[Provide the rewritten query with conversation context]
+</rewritten_query>
+
+<judgement>
+[Either "ENOUGH" if sufficient, or "MORE" if additional information is needed]
+</judgement>"""