redis
diff --git a/‎agent-memory-client/agent_memory_client/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎agent-memory-client/agent_memory_client/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agent-memory-client/agent_memory_client/client.py‎
Lines changed: 29 additions & 6 deletions b/‎agent-memory-client/agent_memory_client/client.py‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎agent_memory_server/api.py‎
Lines changed: 7 additions & 1 deletion b/‎agent_memory_server/api.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎agent_memory_server/config.py‎
Lines changed: 21 additions & 0 deletions b/‎agent_memory_server/config.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎agent_memory_server/llms.py‎
Lines changed: 69 additions & 0 deletions b/‎agent_memory_server/llms.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎agent_memory_server/long_term_memory.py‎
Lines changed: 10 additions & 3 deletions b/‎agent_memory_server/long_term_memory.py‎
Lines changed: 10 additions & 3 deletions
@@ -5,7 +5,7 @@
 memory management capabilities for AI agents and applications.
 """
 
-__version__ = "0.9.2"
+__version__ = "0.10.0"
 
 from .client import MemoryAPIClient, MemoryClientConfig, create_memory_client
 from .exceptions import (
 
@@ -576,12 +576,13 @@ async def search_long_term_memory(
         recency: RecencyConfig | None = None,
         limit: int = 10,
         offset: int = 0,
+        optimize_query: bool = True,
     ) -> MemoryRecordResults:
         """
         Search long-term memories using semantic search and filters.
 
         Args:
-            text: Search query text for semantic similarity
+            text: Query for vector search - will be used for semantic similarity matching
             session_id: Optional session ID filter
             namespace: Optional namespace filter
             topics: Optional topics filter
@@ -593,6 +594,7 @@ async def search_long_term_memory(
             memory_type: Optional memory type filter
             limit: Maximum number of results to return (default: 10)
             offset: Offset for pagination (default: 0)
+            optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
         Returns:
             MemoryRecordResults with matching memories and metadata
@@ -694,10 +696,14 @@ async def search_long_term_memory(
             if recency.server_side_recency is not None:
                 payload["server_side_recency"] = recency.server_side_recency
 
+        # Add optimize_query as query parameter
+        params = {"optimize_query": str(optimize_query).lower()}
+
         try:
             response = await self._client.post(
                 "/v1/long-term-memory/search",
                 json=payload,
+                params=params,
             )
             response.raise_for_status()
             data = response.json()
@@ -725,6 +731,7 @@ async def search_memory_tool(
         max_results: int = 5,
         min_relevance: float | None = None,
         user_id: str | None = None,
+        optimize_query: bool = False,
     ) -> dict[str, Any]:
         """
         Simplified long-term memory search designed for LLM tool use.
@@ -735,13 +742,14 @@ async def search_memory_tool(
         searches long-term memory, not working memory.
 
         Args:
-            query: The search query text
+            query: The query for vector search
             topics: Optional list of topic strings to filter by
             entities: Optional list of entity strings to filter by
             memory_type: Optional memory type ("episodic", "semantic", "message")
             max_results: Maximum results to return (default: 5)
             min_relevance: Optional minimum relevance score (0.0-1.0)
             user_id: Optional user ID to filter memories by
+            optimize_query: Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)
 
         Returns:
             Dict with 'memories' list and 'summary' for LLM consumption
@@ -793,6 +801,7 @@ async def search_memory_tool(
             distance_threshold=distance_threshold,
             limit=max_results,
             user_id=user_id_filter,
+            optimize_query=optimize_query,
         )
 
         # Format for LLM consumption
@@ -862,13 +871,13 @@ async def handle_tool_calls(client, tool_calls):
             "type": "function",
             "function": {
                 "name": "search_memory",
-                "description": "Search long-term memory for relevant information based on a query. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
+                "description": "Search long-term memory for relevant information using a query for vector search. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "query": {
                             "type": "string",
-                            "description": "The search query describing what information you're looking for",
+                            "description": "The query for vector search describing what information you're looking for",
                         },
                         "topics": {
                             "type": "array",
@@ -902,6 +911,11 @@ async def handle_tool_calls(client, tool_calls):
                             "type": "string",
                             "description": "Optional user ID to filter memories by (e.g., 'user123')",
                         },
+                        "optimize_query": {
+                            "type": "boolean",
+                            "default": False,
+                            "description": "Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)",
+                        },
                     },
                     "required": ["query"],
                 },
@@ -2172,20 +2186,22 @@ async def memory_prompt(
         context_window_max: int | None = None,
         long_term_search: dict[str, Any] | None = None,
         user_id: str | None = None,
+        optimize_query: bool = True,
     ) -> dict[str, Any]:
         """
         Hydrate a user query with memory context and return a prompt ready to send to an LLM.
 
         NOTE: `long_term_search` uses the same filter options as `search_long_term_memories`.
 
         Args:
-            query: The input text to find relevant context for
+            query: The query for vector search to find relevant context for
             session_id: Optional session ID to include session messages
             namespace: Optional namespace for the session
             model_name: Optional model name to determine context window size
             context_window_max: Optional direct specification of context window tokens
             long_term_search: Optional search parameters for long-term memory
             user_id: Optional user ID for the session
+            optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
         Returns:
             Dict with messages hydrated with relevant memory context
@@ -2242,10 +2258,14 @@ async def memory_prompt(
                     }
             payload["long_term_search"] = long_term_search
 
+        # Add optimize_query as query parameter
+        params = {"optimize_query": str(optimize_query).lower()}
+
         try:
             response = await self._client.post(
                 "/v1/memory/prompt",
                 json=payload,
+                params=params,
             )
             response.raise_for_status()
             result = response.json()
@@ -2269,6 +2289,7 @@ async def hydrate_memory_prompt(
         distance_threshold: float | None = None,
         memory_type: dict[str, Any] | None = None,
         limit: int = 10,
+        optimize_query: bool = True,
     ) -> dict[str, Any]:
         """
         Hydrate a user query with long-term memory context using filters.
@@ -2277,7 +2298,7 @@ async def hydrate_memory_prompt(
         long-term memory search with the specified filters.
 
         Args:
-            query: The input text to find relevant context for
+            query: The query for vector search to find relevant context for
             session_id: Optional session ID filter (as dict)
             namespace: Optional namespace filter (as dict)
             topics: Optional topics filter (as dict)
@@ -2288,6 +2309,7 @@ async def hydrate_memory_prompt(
             distance_threshold: Optional distance threshold
             memory_type: Optional memory type filter (as dict)
             limit: Maximum number of long-term memories to include
+            optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
         Returns:
             Dict with messages hydrated with relevant long-term memories
@@ -2319,6 +2341,7 @@ async def hydrate_memory_prompt(
         return await self.memory_prompt(
             query=query,
             long_term_search=long_term_search,
+            optimize_query=optimize_query,
         )
 
     def _deep_merge_dicts(
 
@@ -558,13 +558,15 @@ async def create_long_term_memory(
 @router.post("/v1/long-term-memory/search", response_model=MemoryRecordResultsResponse)
 async def search_long_term_memory(
     payload: SearchRequest,
+    optimize_query: bool = True,
     current_user: UserInfo = Depends(get_current_user),
 ):
     """
     Run a semantic search on long-term memory with filtering options.
 
     Args:
         payload: Search payload with filter objects for precise queries
+        optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
     Returns:
         List of search results
@@ -581,6 +583,7 @@ async def search_long_term_memory(
         "distance_threshold": payload.distance_threshold,
         "limit": payload.limit,
         "offset": payload.offset,
+        "optimize_query": optimize_query,
         **filters,
     }
 
@@ -651,13 +654,14 @@ async def delete_long_term_memory(
 @router.post("/v1/memory/prompt", response_model=MemoryPromptResponse)
 async def memory_prompt(
     params: MemoryPromptRequest,
+    optimize_query: bool = True,
     current_user: UserInfo = Depends(get_current_user),
 ) -> MemoryPromptResponse:
     """
     Hydrate a user query with memory context and return a prompt
     ready to send to an LLM.
 
-    `query` is the input text that the caller of this API wants to use to find
+    `query` is the query for vector search that the caller of this API wants to use to find
     relevant context. If `session_id` is provided and matches an existing
     session, the resulting prompt will include those messages as the immediate
     history of messages leading to a message containing `query`.
@@ -668,6 +672,7 @@ async def memory_prompt(
 
     Args:
         params: MemoryPromptRequest
+        optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
     Returns:
         List of messages to send to an LLM, hydrated with relevant memory context
@@ -773,6 +778,7 @@ async def memory_prompt(
         logger.debug(f"[memory_prompt] Search payload: {search_payload}")
         long_term_memories = await search_long_term_memory(
             search_payload,
+            optimize_query=optimize_query,
         )
 
         logger.debug(f"[memory_prompt] Long-term memories: {long_term_memories}")
 
@@ -56,6 +56,12 @@ class Settings(BaseSettings):
     anthropic_api_base: str | None = None
     generation_model: str = "gpt-4o"
     embedding_model: str = "text-embedding-3-small"
+
+    # Model selection for query optimization
+    slow_model: str = "gpt-4o"  # Slower, more capable model for complex tasks
+    fast_model: str = (
+        "gpt-4o-mini"  # Faster, smaller model for quick tasks like query optimization
+    )
     port: int = 8000
     mcp_port: int = 9000
 
@@ -124,6 +130,21 @@ class Settings(BaseSettings):
         0.7  # Fraction of context window that triggers summarization
     )
 
+    # Query optimization settings
+    query_optimization_prompt_template: str = """Transform this natural language query into an optimized version for semantic search. The goal is to make it more effective for finding semantically similar content while preserving the original intent.
+
+Guidelines:
+- Keep the core meaning and intent
+- Use more specific and descriptive terms
+- Remove unnecessary words like "tell me", "I want to know", "can you"
+- Focus on the key concepts and topics
+- Make it concise but comprehensive
+
+Original query: {query}
+
+Optimized query:"""
+    min_optimized_query_length: int = 2
+
     # Other Application settings
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
     default_mcp_user_id: str | None = None
 
@@ -423,3 +423,72 @@ async def get_model_client(
         raise ValueError(f"Unsupported model provider: {model_config.provider}")
 
     return _model_clients[model_name]
+
+
+async def optimize_query_for_vector_search(
+    query: str,
+    model_name: str | None = None,
+) -> str:
+    """
+    Optimize a user query for vector search using a fast model.
+
+    This function takes a natural language query and rewrites it to be more effective
+    for semantic similarity search. It uses a fast, small model to improve search
+    performance while maintaining query intent.
+
+    Args:
+        query: The original user query to optimize
+        model_name: Model to use for optimization (defaults to settings.fast_model)
+
+    Returns:
+        Optimized query string better suited for vector search
+    """
+    if not query or not query.strip():
+        return query
+
+    # Use fast model from settings if not specified
+    effective_model = model_name or settings.fast_model
+
+    # Create optimization prompt from config template
+    optimization_prompt = settings.query_optimization_prompt_template.format(
+        query=query
+    )
+
+    try:
+        client = await get_model_client(effective_model)
+
+        response = await client.create_chat_completion(
+            model=effective_model,
+            prompt=optimization_prompt,
+        )
+
+        if (
+            hasattr(response, "choices")
+            and response.choices
+            and len(response.choices) > 0
+        ):
+            optimized = ""
+            if hasattr(response.choices[0], "message"):
+                optimized = response.choices[0].message.content
+            elif hasattr(response.choices[0], "text"):
+                optimized = response.choices[0].text
+            else:
+                optimized = str(response.choices[0])
+
+            # Clean up the response
+            optimized = optimized.strip()
+
+            # Fallback to original if optimization failed
+            if not optimized or len(optimized) < settings.min_optimized_query_length:
+                logger.warning(f"Query optimization failed for: {query}")
+                return query
+
+            logger.debug(f"Optimized query: '{query}' -> '{optimized}'")
+            return optimized
+
+    except Exception as e:
+        logger.warning(f"Failed to optimize query '{query}': {e}")
+        # Return original query if optimization fails
+        return query
+
+    return query
@@ -29,6 +29,7 @@
     AnthropicClientWrapper,
     OpenAIClientWrapper,
     get_model_client,
+    optimize_query_for_vector_search,
 )
 from agent_memory_server.models import (
     ExtractedMemoryRecord,
@@ -704,13 +705,13 @@ async def search_long_term_memories(
     recency_params: dict | None = None,
     limit: int = 10,
     offset: int = 0,
+    optimize_query: bool = True,
 ) -> MemoryRecordResults:
     """
     Search for long-term memories using the pluggable VectorStore adapter.
 
     Args:
-        text: Search query text
-        redis: Redis client (kept for compatibility but may be unused depending on backend)
+        text: Query for vector search - will be used for semantic similarity matching
         session_id: Optional session ID filter
         user_id: Optional user ID filter
         namespace: Optional namespace filter
@@ -724,16 +725,22 @@ async def search_long_term_memories(
         memory_hash: Optional memory hash filter
         limit: Maximum number of results
         offset: Offset for pagination
+        optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
 
     Returns:
         MemoryRecordResults containing matching memories
     """
+    # Optimize query for vector search if requested
+    search_query = text
+    if optimize_query and text:
+        search_query = await optimize_query_for_vector_search(text)
+
     # Get the VectorStore adapter
     adapter = await get_vectorstore_adapter()
 
     # Delegate search to the adapter
     return await adapter.search_memories(
-        query=text,
+        query=search_query,
         session_id=session_id,
         user_id=user_id,
         namespace=namespace,