Skip to content

Commit 19d5af5

Browse files
Add context usage percentage to working memory endpoints
- Add context_usage_percentage field to WorkingMemoryResponse model - Add _calculate_context_usage_percentage() helper function - Update GET /v1/working-memory/{session_id} to return percentage - Update PUT /v1/working-memory/{session_id} to return percentage based on final state (after potential summarization) - Percentage calculated as (current_tokens / token_threshold) * 100 where token_threshold = context_window * 0.7 - Returns None when no model info provided, otherwise 0-100% value Resolves #37 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Andrew Brookins <[email protected]>
1 parent c3fd604 commit 19d5af5

File tree

3 files changed

+73
-11
lines changed

3 files changed

+73
-11
lines changed

agent_memory_server/api.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,41 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
6363
return total_tokens
6464

6565

66+
def _calculate_context_usage_percentage(
67+
messages: list[MemoryMessage],
68+
model_name: ModelNameLiteral | None,
69+
context_window_max: int | None,
70+
) -> float | None:
71+
"""
72+
Calculate the percentage of context window used before auto-summarization triggers.
73+
74+
Args:
75+
messages: List of messages to calculate token count for
76+
model_name: The client's LLM model name for context window determination
77+
context_window_max: Direct specification of context window max tokens
78+
79+
Returns:
80+
Percentage (0-100) of context used, or None if no model info provided
81+
"""
82+
if not messages or (not model_name and not context_window_max):
83+
return None
84+
85+
# Calculate current token usage
86+
current_tokens = _calculate_messages_token_count(messages)
87+
88+
# Get effective token limit for the client's model
89+
max_tokens = _get_effective_token_limit(model_name, context_window_max)
90+
91+
# Use the same threshold as _summarize_working_memory (70% of context window)
92+
token_threshold = int(max_tokens * 0.7)
93+
94+
# Calculate percentage of threshold used
95+
percentage = (current_tokens / token_threshold) * 100.0
96+
97+
# Cap at 100% for display purposes
98+
return min(percentage, 100.0)
99+
100+
66101
async def _summarize_working_memory(
67102
memory: WorkingMemory,
68103
model_name: ModelNameLiteral | None = None,
@@ -269,7 +304,18 @@ async def get_working_memory(
269304

270305
logger.debug(f"Working mem: {working_mem}")
271306

272-
return working_mem
307+
# Calculate context usage percentage
308+
context_usage_percentage = _calculate_context_usage_percentage(
309+
messages=working_mem.messages,
310+
model_name=model_name,
311+
context_window_max=context_window_max,
312+
)
313+
314+
# Return WorkingMemoryResponse with percentage
315+
return WorkingMemoryResponse(
316+
**working_mem.model_dump(),
317+
context_usage_percentage=context_usage_percentage,
318+
)
273319

274320

275321
@router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
@@ -348,7 +394,18 @@ async def put_working_memory(
348394
namespace=updated_memory.namespace,
349395
)
350396

351-
return updated_memory
397+
# Calculate context usage percentage based on the final state (after potential summarization)
398+
context_usage_percentage = _calculate_context_usage_percentage(
399+
messages=updated_memory.messages,
400+
model_name=model_name,
401+
context_window_max=context_window_max,
402+
)
403+
404+
# Return WorkingMemoryResponse with percentage
405+
return WorkingMemoryResponse(
406+
**updated_memory.model_dump(),
407+
context_usage_percentage=context_usage_percentage,
408+
)
352409

353410

354411
@router.delete("/v1/working-memory/{session_id}", response_model=AckResponse)

agent_memory_server/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ class WorkingMemory(BaseModel):
222222
class WorkingMemoryResponse(WorkingMemory):
223223
"""Response containing working memory"""
224224

225+
context_usage_percentage: float | None = Field(
226+
default=None,
227+
description="Percentage of context window used before auto-summarization triggers (0-100)",
228+
)
229+
225230

226231
class WorkingMemoryRequest(BaseModel):
227232
"""Request parameters for working memory operations"""

tests/test_full_integration.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -773,9 +773,9 @@ async def test_memory_prompt_with_long_term_search(
773773
)
774774
for msg in messages
775775
)
776-
assert (
777-
relevant_context_found
778-
), f"No relevant memory context found in messages: {messages}"
776+
assert relevant_context_found, (
777+
f"No relevant memory context found in messages: {messages}"
778+
)
779779

780780
# Cleanup
781781
await client.delete_long_term_memories([m.id for m in test_memories])
@@ -1079,9 +1079,9 @@ async def test_full_workflow_integration(
10791079
)
10801080
print(f"No topic filter search results: {no_topic_search}")
10811081

1082-
assert (
1083-
len(search_results["memories"]) > 0
1084-
), f"No memories found in search results: {search_results}"
1082+
assert len(search_results["memories"]) > 0, (
1083+
f"No memories found in search results: {search_results}"
1084+
)
10851085

10861086
# 6. Test tool integration with a realistic scenario
10871087
tool_call = {
@@ -1126,9 +1126,9 @@ async def test_full_workflow_integration(
11261126
m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix)
11271127
]
11281128

1129-
assert (
1130-
len(our_memories) == 0
1131-
), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
1129+
assert len(our_memories) == 0, (
1130+
f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
1131+
)
11321132

11331133

11341134
@pytest.mark.integration

0 commit comments

Comments
 (0)