Skip to content

Commit b324f48

Browse files
abrookinsclaude
andcommitted
merge: resolve conflicts with main branch
- Merged recency configuration and optimize_query parameter in client - Updated MCP search to use optimize_query while preserving Pydantic returns 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
2 parents 58ee06b + a930acc commit b324f48

File tree

13 files changed

+1221
-30
lines changed

13 files changed

+1221
-30
lines changed

agent-memory-client/agent_memory_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
memory management capabilities for AI agents and applications.
66
"""
77

8-
__version__ = "0.9.2"
8+
__version__ = "0.10.0"
99

1010
from .client import MemoryAPIClient, MemoryClientConfig, create_memory_client
1111
from .exceptions import (

agent-memory-client/agent_memory_client/client.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,13 @@ async def search_long_term_memory(
576576
recency: RecencyConfig | None = None,
577577
limit: int = 10,
578578
offset: int = 0,
579+
optimize_query: bool = True,
579580
) -> MemoryRecordResults:
580581
"""
581582
Search long-term memories using semantic search and filters.
582583
583584
Args:
584-
text: Search query text for semantic similarity
585+
text: Query for vector search - will be used for semantic similarity matching
585586
session_id: Optional session ID filter
586587
namespace: Optional namespace filter
587588
topics: Optional topics filter
@@ -593,6 +594,7 @@ async def search_long_term_memory(
593594
memory_type: Optional memory type filter
594595
limit: Maximum number of results to return (default: 10)
595596
offset: Offset for pagination (default: 0)
597+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
596598
597599
Returns:
598600
MemoryRecordResults with matching memories and metadata
@@ -694,10 +696,14 @@ async def search_long_term_memory(
694696
if recency.server_side_recency is not None:
695697
payload["server_side_recency"] = recency.server_side_recency
696698

699+
# Add optimize_query as query parameter
700+
params = {"optimize_query": str(optimize_query).lower()}
701+
697702
try:
698703
response = await self._client.post(
699704
"/v1/long-term-memory/search",
700705
json=payload,
706+
params=params,
701707
)
702708
response.raise_for_status()
703709
data = response.json()
@@ -725,6 +731,7 @@ async def search_memory_tool(
725731
max_results: int = 5,
726732
min_relevance: float | None = None,
727733
user_id: str | None = None,
734+
optimize_query: bool = False,
728735
) -> dict[str, Any]:
729736
"""
730737
Simplified long-term memory search designed for LLM tool use.
@@ -735,13 +742,14 @@ async def search_memory_tool(
735742
searches long-term memory, not working memory.
736743
737744
Args:
738-
query: The search query text
745+
query: The query for vector search
739746
topics: Optional list of topic strings to filter by
740747
entities: Optional list of entity strings to filter by
741748
memory_type: Optional memory type ("episodic", "semantic", "message")
742749
max_results: Maximum results to return (default: 5)
743750
min_relevance: Optional minimum relevance score (0.0-1.0)
744751
user_id: Optional user ID to filter memories by
752+
optimize_query: Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)
745753
746754
Returns:
747755
Dict with 'memories' list and 'summary' for LLM consumption
@@ -793,6 +801,7 @@ async def search_memory_tool(
793801
distance_threshold=distance_threshold,
794802
limit=max_results,
795803
user_id=user_id_filter,
804+
optimize_query=optimize_query,
796805
)
797806

798807
# Format for LLM consumption
@@ -862,13 +871,13 @@ async def handle_tool_calls(client, tool_calls):
862871
"type": "function",
863872
"function": {
864873
"name": "search_memory",
865-
"description": "Search long-term memory for relevant information based on a query. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
874+
"description": "Search long-term memory for relevant information using a query for vector search. Use this when you need to recall past conversations, user preferences, or previously stored information. Note: This searches only long-term memory, not current working memory.",
866875
"parameters": {
867876
"type": "object",
868877
"properties": {
869878
"query": {
870879
"type": "string",
871-
"description": "The search query describing what information you're looking for",
880+
"description": "The query for vector search describing what information you're looking for",
872881
},
873882
"topics": {
874883
"type": "array",
@@ -902,6 +911,11 @@ async def handle_tool_calls(client, tool_calls):
902911
"type": "string",
903912
"description": "Optional user ID to filter memories by (e.g., 'user123')",
904913
},
914+
"optimize_query": {
915+
"type": "boolean",
916+
"default": False,
917+
"description": "Whether to optimize the query for vector search (default: False - LLMs typically provide already optimized queries)",
918+
},
905919
},
906920
"required": ["query"],
907921
},
@@ -2172,20 +2186,22 @@ async def memory_prompt(
21722186
context_window_max: int | None = None,
21732187
long_term_search: dict[str, Any] | None = None,
21742188
user_id: str | None = None,
2189+
optimize_query: bool = True,
21752190
) -> dict[str, Any]:
21762191
"""
21772192
Hydrate a user query with memory context and return a prompt ready to send to an LLM.
21782193
21792194
NOTE: `long_term_search` uses the same filter options as `search_long_term_memories`.
21802195
21812196
Args:
2182-
query: The input text to find relevant context for
2197+
query: The query for vector search to find relevant context for
21832198
session_id: Optional session ID to include session messages
21842199
namespace: Optional namespace for the session
21852200
model_name: Optional model name to determine context window size
21862201
context_window_max: Optional direct specification of context window tokens
21872202
long_term_search: Optional search parameters for long-term memory
21882203
user_id: Optional user ID for the session
2204+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
21892205
21902206
Returns:
21912207
Dict with messages hydrated with relevant memory context
@@ -2242,10 +2258,14 @@ async def memory_prompt(
22422258
}
22432259
payload["long_term_search"] = long_term_search
22442260

2261+
# Add optimize_query as query parameter
2262+
params = {"optimize_query": str(optimize_query).lower()}
2263+
22452264
try:
22462265
response = await self._client.post(
22472266
"/v1/memory/prompt",
22482267
json=payload,
2268+
params=params,
22492269
)
22502270
response.raise_for_status()
22512271
result = response.json()
@@ -2269,6 +2289,7 @@ async def hydrate_memory_prompt(
22692289
distance_threshold: float | None = None,
22702290
memory_type: dict[str, Any] | None = None,
22712291
limit: int = 10,
2292+
optimize_query: bool = True,
22722293
) -> dict[str, Any]:
22732294
"""
22742295
Hydrate a user query with long-term memory context using filters.
@@ -2277,7 +2298,7 @@ async def hydrate_memory_prompt(
22772298
long-term memory search with the specified filters.
22782299
22792300
Args:
2280-
query: The input text to find relevant context for
2301+
query: The query for vector search to find relevant context for
22812302
session_id: Optional session ID filter (as dict)
22822303
namespace: Optional namespace filter (as dict)
22832304
topics: Optional topics filter (as dict)
@@ -2288,6 +2309,7 @@ async def hydrate_memory_prompt(
22882309
distance_threshold: Optional distance threshold
22892310
memory_type: Optional memory type filter (as dict)
22902311
limit: Maximum number of long-term memories to include
2312+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
22912313
22922314
Returns:
22932315
Dict with messages hydrated with relevant long-term memories
@@ -2319,6 +2341,7 @@ async def hydrate_memory_prompt(
23192341
return await self.memory_prompt(
23202342
query=query,
23212343
long_term_search=long_term_search,
2344+
optimize_query=optimize_query,
23222345
)
23232346

23242347
def _deep_merge_dicts(

agent_memory_server/api.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,13 +558,15 @@ async def create_long_term_memory(
558558
@router.post("/v1/long-term-memory/search", response_model=MemoryRecordResultsResponse)
559559
async def search_long_term_memory(
560560
payload: SearchRequest,
561+
optimize_query: bool = True,
561562
current_user: UserInfo = Depends(get_current_user),
562563
):
563564
"""
564565
Run a semantic search on long-term memory with filtering options.
565566
566567
Args:
567568
payload: Search payload with filter objects for precise queries
569+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
568570
569571
Returns:
570572
List of search results
@@ -581,6 +583,7 @@ async def search_long_term_memory(
581583
"distance_threshold": payload.distance_threshold,
582584
"limit": payload.limit,
583585
"offset": payload.offset,
586+
"optimize_query": optimize_query,
584587
**filters,
585588
}
586589

@@ -651,13 +654,14 @@ async def delete_long_term_memory(
651654
@router.post("/v1/memory/prompt", response_model=MemoryPromptResponse)
652655
async def memory_prompt(
653656
params: MemoryPromptRequest,
657+
optimize_query: bool = True,
654658
current_user: UserInfo = Depends(get_current_user),
655659
) -> MemoryPromptResponse:
656660
"""
657661
Hydrate a user query with memory context and return a prompt
658662
ready to send to an LLM.
659663
660-
`query` is the input text that the caller of this API wants to use to find
664+
`query` is the query for vector search that the caller of this API wants to use to find
661665
relevant context. If `session_id` is provided and matches an existing
662666
session, the resulting prompt will include those messages as the immediate
663667
history of messages leading to a message containing `query`.
@@ -668,6 +672,7 @@ async def memory_prompt(
668672
669673
Args:
670674
params: MemoryPromptRequest
675+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
671676
672677
Returns:
673678
List of messages to send to an LLM, hydrated with relevant memory context
@@ -773,6 +778,7 @@ async def memory_prompt(
773778
logger.debug(f"[memory_prompt] Search payload: {search_payload}")
774779
long_term_memories = await search_long_term_memory(
775780
search_payload,
781+
optimize_query=optimize_query,
776782
)
777783

778784
logger.debug(f"[memory_prompt] Long-term memories: {long_term_memories}")

agent_memory_server/config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ class Settings(BaseSettings):
5656
anthropic_api_base: str | None = None
5757
generation_model: str = "gpt-4o"
5858
embedding_model: str = "text-embedding-3-small"
59+
60+
# Model selection for query optimization
61+
slow_model: str = "gpt-4o" # Slower, more capable model for complex tasks
62+
fast_model: str = (
63+
"gpt-4o-mini" # Faster, smaller model for quick tasks like query optimization
64+
)
5965
port: int = 8000
6066
mcp_port: int = 9000
6167

@@ -124,6 +130,21 @@ class Settings(BaseSettings):
124130
0.7 # Fraction of context window that triggers summarization
125131
)
126132

133+
# Query optimization settings
134+
query_optimization_prompt_template: str = """Transform this natural language query into an optimized version for semantic search. The goal is to make it more effective for finding semantically similar content while preserving the original intent.
135+
136+
Guidelines:
137+
- Keep the core meaning and intent
138+
- Use more specific and descriptive terms
139+
- Remove unnecessary words like "tell me", "I want to know", "can you"
140+
- Focus on the key concepts and topics
141+
- Make it concise but comprehensive
142+
143+
Original query: {query}
144+
145+
Optimized query:"""
146+
min_optimized_query_length: int = 2
147+
127148
# Other Application settings
128149
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
129150
default_mcp_user_id: str | None = None

agent_memory_server/llms.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,72 @@ async def get_model_client(
423423
raise ValueError(f"Unsupported model provider: {model_config.provider}")
424424

425425
return _model_clients[model_name]
426+
427+
428+
async def optimize_query_for_vector_search(
429+
query: str,
430+
model_name: str | None = None,
431+
) -> str:
432+
"""
433+
Optimize a user query for vector search using a fast model.
434+
435+
This function takes a natural language query and rewrites it to be more effective
436+
for semantic similarity search. It uses a fast, small model to improve search
437+
performance while maintaining query intent.
438+
439+
Args:
440+
query: The original user query to optimize
441+
model_name: Model to use for optimization (defaults to settings.fast_model)
442+
443+
Returns:
444+
Optimized query string better suited for vector search
445+
"""
446+
if not query or not query.strip():
447+
return query
448+
449+
# Use fast model from settings if not specified
450+
effective_model = model_name or settings.fast_model
451+
452+
# Create optimization prompt from config template
453+
optimization_prompt = settings.query_optimization_prompt_template.format(
454+
query=query
455+
)
456+
457+
try:
458+
client = await get_model_client(effective_model)
459+
460+
response = await client.create_chat_completion(
461+
model=effective_model,
462+
prompt=optimization_prompt,
463+
)
464+
465+
if (
466+
hasattr(response, "choices")
467+
and response.choices
468+
and len(response.choices) > 0
469+
):
470+
optimized = ""
471+
if hasattr(response.choices[0], "message"):
472+
optimized = response.choices[0].message.content
473+
elif hasattr(response.choices[0], "text"):
474+
optimized = response.choices[0].text
475+
else:
476+
optimized = str(response.choices[0])
477+
478+
# Clean up the response
479+
optimized = optimized.strip()
480+
481+
# Fallback to original if optimization failed
482+
if not optimized or len(optimized) < settings.min_optimized_query_length:
483+
logger.warning(f"Query optimization failed for: {query}")
484+
return query
485+
486+
logger.debug(f"Optimized query: '{query}' -> '{optimized}'")
487+
return optimized
488+
489+
except Exception as e:
490+
logger.warning(f"Failed to optimize query '{query}': {e}")
491+
# Return original query if optimization fails
492+
return query
493+
494+
return query

agent_memory_server/long_term_memory.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
AnthropicClientWrapper,
3030
OpenAIClientWrapper,
3131
get_model_client,
32+
optimize_query_for_vector_search,
3233
)
3334
from agent_memory_server.models import (
3435
ExtractedMemoryRecord,
@@ -704,13 +705,13 @@ async def search_long_term_memories(
704705
recency_params: dict | None = None,
705706
limit: int = 10,
706707
offset: int = 0,
708+
optimize_query: bool = True,
707709
) -> MemoryRecordResults:
708710
"""
709711
Search for long-term memories using the pluggable VectorStore adapter.
710712
711713
Args:
712-
text: Search query text
713-
redis: Redis client (kept for compatibility but may be unused depending on backend)
714+
text: Query for vector search - will be used for semantic similarity matching
714715
session_id: Optional session ID filter
715716
user_id: Optional user ID filter
716717
namespace: Optional namespace filter
@@ -724,16 +725,22 @@ async def search_long_term_memories(
724725
memory_hash: Optional memory hash filter
725726
limit: Maximum number of results
726727
offset: Offset for pagination
728+
optimize_query: Whether to optimize the query for vector search using a fast model (default: True)
727729
728730
Returns:
729731
MemoryRecordResults containing matching memories
730732
"""
733+
# Optimize query for vector search if requested
734+
search_query = text
735+
if optimize_query and text:
736+
search_query = await optimize_query_for_vector_search(text)
737+
731738
# Get the VectorStore adapter
732739
adapter = await get_vectorstore_adapter()
733740

734741
# Delegate search to the adapter
735742
return await adapter.search_memories(
736-
query=text,
743+
query=search_query,
737744
session_id=session_id,
738745
user_id=user_id,
739746
namespace=namespace,

0 commit comments

Comments
 (0)