Skip to content

Commit a930acc

Browse files
authored
Merge pull request #44 from redis/feature/look-at-times
Feat: Add query optimization for vector search with configurable models
2 parents da35c4e + 812ca86 commit a930acc

File tree

13 files changed

+1221
-30
lines changed

13 files changed

+1221
-30
lines changed

agent-memory-client/agent_memory_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
memory management capabilities for AI agents and applications.
66
"""
77

8-
__version__ = "0.9.2"
8+
__version__ = "0.10.0"
99

1010
from .client import MemoryAPIClient, MemoryClientConfig, create_memory_client
1111
from .exceptions import (

agent-memory-client/agent_memory_client/client.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -574,12 +574,13 @@ async def search_long_term_memory(
574574
memory_type: MemoryType | dict[str, Any] | None = None,
575575
limit: int = 10,
576576
offset: int = 0,
577+
optimize_query: bool = True,
577578
) -> MemoryRecordResults:
578579
"""
579580
Search long-term memories using semantic search and filters.
580581
581582
Args:
582-
text: Search query text for semantic similarity
583+
text: Query for vector search - will be used for semantic similarity matching
583584
session_id: Optional session ID filter
584585
namespace: Optional namespace filter
585586
topics: Optional topics filter
@@ -591,6 +592,7 @@ async def search_long_term_memory(
591592
memory_type: Optional memory type filter
592593
limit: Maximum number of results to return (default: 10)
593594
offset: Offset for pagination (default: 0)
595+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
594596
595597
Returns:
596598
MemoryRecordResults with matching memories and metadata
@@ -669,10 +671,14 @@ async def search_long_term_memory(
669671
if distance_threshold is not None:
670672
payload["distance_threshold"] = distance_threshold
671673

674+
# Add optimize_query as query parameter
675+
params = {"optimize_query": str(optimize_query).lower()}
676+
672677
try:
673678
response = await self._client.post(
674679
"/v1/long-term-memory/search",
675680
json=payload,
681+
params=params,
676682
)
677683
response.raise_for_status()
678684
return MemoryRecordResults(**response.json())
@@ -691,6 +697,7 @@ async def search_memory_tool(
691697
max_results: int = 5,
692698
min_relevance: float | None = None,
693699
user_id: str | None = None,
700+
optimize_query: bool = False,
694701
) -> dict[str, Any]:
695702
"""
696703
Simplified long-term memory search designed for LLM tool use.
@@ -701,13 +708,14 @@ async def search_memory_tool(
701708
searches long-term memory, not working memory.
702709
703710
Args:
704-
query: The search query text
711+
query: The query for vector search
705712
topics: Optional list of topic strings to filter by
706713
entities: Optional list of entity strings to filter by
707714
memory_type: Optional memory type ("episodic", "semantic", "message")
708715
max_results: Maximum results to return (default: 5)
709716
min_relevance: Optional minimum relevance score (0.0-1.0)
710717
user_id: Optional user ID to filter memories by
718+
optimize_query: Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)
711719
712720
Returns:
713721
Dict with 'memories' list and 'summary' for LLM consumption
@@ -759,6 +767,7 @@ async def search_memory_tool(
759767
distance_threshold=distance_threshold,
760768
limit=max_results,
761769
user_id=user_id_filter,
770+
optimize_query=optimize_query,
762771
)
763772

764773
# Format for LLM consumption
@@ -828,13 +837,13 @@ async def handle_tool_calls(client, tool_calls):
828837
"type": "function",
829838
"function": {
830839
"name": "search_memory",
831-
"description": "Search long-term memory for relevant information based on a query. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
840+
"description": "Search long-term memory for relevant information using a query for vector search. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
832841
"parameters": {
833842
"type": "object",
834843
"properties": {
835844
"query": {
836845
"type": "string",
837-
"description": "The search query describing what information you're looking for",
846+
"description": "The query for vector search describing what information you're looking for",
838847
},
839848
"topics": {
840849
"type": "array",
@@ -868,6 +877,11 @@ async def handle_tool_calls(client, tool_calls):
868877
"type": "string",
869878
"description": "Optional user ID to filter memories by (e.g., 'user123')",
870879
},
880+
"optimize_query": {
881+
"type": "boolean",
882+
"default": False,
883+
"description": "Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)",
884+
},
871885
},
872886
"required": ["query"],
873887
},
@@ -2138,20 +2152,22 @@ async def memory_prompt(
21382152
context_window_max: int | None = None,
21392153
long_term_search: dict[str, Any] | None = None,
21402154
user_id: str | None = None,
2155+
optimize_query: bool = True,
21412156
) -> dict[str, Any]:
21422157
"""
21432158
Hydrate a user query with memory context and return a prompt ready to send to an LLM.
21442159
21452160
NOTE: `long_term_search` uses the same filter options as `search_long_term_memories`.
21462161
21472162
Args:
2148-
query: The input text to find relevant context for
2163+
query: The query for vector search to find relevant context for
21492164
session_id: Optional session ID to include session messages
21502165
namespace: Optional namespace for the session
21512166
model_name: Optional model name to determine context window size
21522167
context_window_max: Optional direct specification of context window tokens
21532168
long_term_search: Optional search parameters for long-term memory
21542169
user_id: Optional user ID for the session
2170+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
21552171
21562172
Returns:
21572173
Dict with messages hydrated with relevant memory context
@@ -2208,10 +2224,14 @@ async def memory_prompt(
22082224
}
22092225
payload["long_term_search"] = long_term_search
22102226

2227+
# Add optimize_query as query parameter
2228+
params = {"optimize_query": str(optimize_query).lower()}
2229+
22112230
try:
22122231
response = await self._client.post(
22132232
"/v1/memory/prompt",
22142233
json=payload,
2234+
params=params,
22152235
)
22162236
response.raise_for_status()
22172237
result = response.json()
@@ -2235,6 +2255,7 @@ async def hydrate_memory_prompt(
22352255
distance_threshold: float | None = None,
22362256
memory_type: dict[str, Any] | None = None,
22372257
limit: int = 10,
2258+
optimize_query: bool = True,
22382259
) -> dict[str, Any]:
22392260
"""
22402261
Hydrate a user query with long-term memory context using filters.
@@ -2243,7 +2264,7 @@ async def hydrate_memory_prompt(
22432264
long-term memory search with the specified filters.
22442265
22452266
Args:
2246-
query: The input text to find relevant context for
2267+
query: The query for vector search to find relevant context for
22472268
session_id: Optional session ID filter (as dict)
22482269
namespace: Optional namespace filter (as dict)
22492270
topics: Optional topics filter (as dict)
@@ -2254,6 +2275,7 @@ async def hydrate_memory_prompt(
22542275
distance_threshold: Optional distance threshold
22552276
memory_type: Optional memory type filter (as dict)
22562277
limit: Maximum number of long-term memories to include
2278+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
22572279
22582280
Returns:
22592281
Dict with messages hydrated with relevant long-term memories
@@ -2285,6 +2307,7 @@ async def hydrate_memory_prompt(
22852307
return await self.memory_prompt(
22862308
query=query,
22872309
long_term_search=long_term_search,
2310+
optimize_query=optimize_query,
22882311
)
22892312

22902313
def _deep_merge_dicts(

agent_memory_server/api.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,15 @@ async def create_long_term_memory(
494494
@router.post("/v1/long-term-memory/search", response_model=MemoryRecordResultsResponse)
495495
async def search_long_term_memory(
496496
payload: SearchRequest,
497+
optimize_query: bool = True,
497498
current_user: UserInfo = Depends(get_current_user),
498499
):
499500
"""
500501
Run a semantic search on long-term memory with filtering options.
501502
502503
Args:
503504
payload: Search payload with filter objects for precise queries
505+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
504506
505507
Returns:
506508
List of search results
@@ -517,6 +519,7 @@ async def search_long_term_memory(
517519
"distance_threshold": payload.distance_threshold,
518520
"limit": payload.limit,
519521
"offset": payload.offset,
522+
"optimize_query": optimize_query,
520523
**filters,
521524
}
522525

@@ -549,13 +552,14 @@ async def delete_long_term_memory(
549552
@router.post("/v1/memory/prompt", response_model=MemoryPromptResponse)
550553
async def memory_prompt(
551554
params: MemoryPromptRequest,
555+
optimize_query: bool = True,
552556
current_user: UserInfo = Depends(get_current_user),
553557
) -> MemoryPromptResponse:
554558
"""
555559
Hydrate a user query with memory context and return a prompt
556560
ready to send to an LLM.
557561
558-
`query` is the input text that the caller of this API wants to use to find
562+
`query` is the query for vector search that the caller of this API wants to use to find
559563
relevant context. If `session_id` is provided and matches an existing
560564
session, the resulting prompt will include those messages as the immediate
561565
history of messages leading to a message containing `query`.
@@ -566,6 +570,7 @@ async def memory_prompt(
566570
567571
Args:
568572
params: MemoryPromptRequest
573+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
569574
570575
Returns:
571576
List of messages to send to an LLM, hydrated with relevant memory context
@@ -671,6 +676,7 @@ async def memory_prompt(
671676
logger.debug(f"[memory_prompt] Search payload: {search_payload}")
672677
long_term_memories = await search_long_term_memory(
673678
search_payload,
679+
optimize_query=optimize_query,
674680
)
675681

676682
logger.debug(f"[memory_prompt] Long-term memories: {long_term_memories}")

agent_memory_server/config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ class Settings(BaseSettings):
5656
anthropic_api_base: str | None = None
5757
generation_model: str = "gpt-4o"
5858
embedding_model: str = "text-embedding-3-small"
59+
60+
# Model selection for query optimization
61+
slow_model: str = "gpt-4o" # Slower, more capable model for complex tasks
62+
fast_model: str = (
63+
"gpt-4o-mini" # Faster, smaller model for quick tasks like query optimization
64+
)
5965
port: int = 8000
6066
mcp_port: int = 9000
6167

@@ -124,6 +130,21 @@ class Settings(BaseSettings):
124130
0.7 # Fraction of context window that triggers summarization
125131
)
126132

133+
# Query optimization settings
134+
query_optimization_prompt_template: str = """Transform this natural language query into an optimized version for semantic search. The goal is to make it more effective for finding semantically similar content while preserving the original intent.
135+
136+
Guidelines:
137+
- Keep the core meaning and intent
138+
- Use more specific and descriptive terms
139+
- Remove unnecessary words like "tell me", "I want to know", "can you"
140+
- Focus on the key concepts and topics
141+
- Make it concise but comprehensive
142+
143+
Original query: {query}
144+
145+
Optimized query:"""
146+
min_optimized_query_length: int = 2
147+
127148
# Other Application settings
128149
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
129150
default_mcp_user_id: str | None = None

agent_memory_server/llms.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,72 @@ async def get_model_client(
423423
raise ValueError(f"Unsupported model provider: {model_config.provider}")
424424

425425
return _model_clients[model_name]
426+
427+
428+
async def optimize_query_for_vector_search(
429+
query: str,
430+
model_name: str | None = None,
431+
) -> str:
432+
"""
433+
Optimize a user query for vector search using a fast model.
434+
435+
This function takes a natural language query and rewrites it to be more effective
436+
for semantic similarity search. It uses a fast, small model to improve search
437+
performance while maintaining query intent.
438+
439+
Args:
440+
query: The original user query to optimize
441+
model_name: Model to use for optimization (defaults to settings.fast_model)
442+
443+
Returns:
444+
Optimized query string better suited for vector search
445+
"""
446+
if not query or not query.strip():
447+
return query
448+
449+
# Use fast model from settings if not specified
450+
effective_model = model_name or settings.fast_model
451+
452+
# Create optimization prompt from config template
453+
optimization_prompt = settings.query_optimization_prompt_template.format(
454+
query=query
455+
)
456+
457+
try:
458+
client = await get_model_client(effective_model)
459+
460+
response = await client.create_chat_completion(
461+
model=effective_model,
462+
prompt=optimization_prompt,
463+
)
464+
465+
if (
466+
hasattr(response, "choices")
467+
and response.choices
468+
and len(response.choices) > 0
469+
):
470+
optimized = ""
471+
if hasattr(response.choices[0], "message"):
472+
optimized = response.choices[0].message.content
473+
elif hasattr(response.choices[0], "text"):
474+
optimized = response.choices[0].text
475+
else:
476+
optimized = str(response.choices[0])
477+
478+
# Clean up the response
479+
optimized = optimized.strip()
480+
481+
# Fallback to original if optimization failed
482+
if not optimized or len(optimized) < settings.min_optimized_query_length:
483+
logger.warning(f"Query optimization failed for: {query}")
484+
return query
485+
486+
logger.debug(f"Optimized query: '{query}' -> '{optimized}'")
487+
return optimized
488+
489+
except Exception as e:
490+
logger.warning(f"Failed to optimize query '{query}': {e}")
491+
# Return original query if optimization fails
492+
return query
493+
494+
return query

agent_memory_server/long_term_memory.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
AnthropicClientWrapper,
2929
OpenAIClientWrapper,
3030
get_model_client,
31+
optimize_query_for_vector_search,
3132
)
3233
from agent_memory_server.models import (
3334
ExtractedMemoryRecord,
@@ -718,13 +719,13 @@ async def search_long_term_memories(
718719
memory_hash: MemoryHash | None = None,
719720
limit: int = 10,
720721
offset: int = 0,
722+
optimize_query: bool = True,
721723
) -> MemoryRecordResults:
722724
"""
723725
Search for long-term memories using the pluggable VectorStore adapter.
724726
725727
Args:
726-
text: Search query text
727-
redis: Redis client (kept for compatibility but may be unused depending on backend)
728+
text: Query for vector search - will be used for semantic similarity matching
728729
session_id: Optional session ID filter
729730
user_id: Optional user ID filter
730731
namespace: Optional namespace filter
@@ -738,16 +739,22 @@ async def search_long_term_memories(
738739
memory_hash: Optional memory hash filter
739740
limit: Maximum number of results
740741
offset: Offset for pagination
742+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
741743
742744
Returns:
743745
MemoryRecordResults containing matching memories
744746
"""
747+
# Optimize query for vector search if requested
748+
search_query = text
749+
if optimize_query and text:
750+
search_query = await optimize_query_for_vector_search(text)
751+
745752
# Get the VectorStore adapter
746753
adapter = await get_vectorstore_adapter()
747754

748755
# Delegate search to the adapter
749756
return await adapter.search_memories(
750-
query=text,
757+
query=search_query,
751758
session_id=session_id,
752759
user_id=user_id,
753760
namespace=namespace,

0 commit comments

Comments
 (0)