Skip to content

Commit abb0fff

Browse files
authored
Merge pull request #38 from redis/claude/issue-37-20250722-2011
Add dual context percentage fields to working memory endpoints
2 parents a2fcd95 + 2acd27b commit abb0fff

File tree

7 files changed

+221
-9
lines changed

7 files changed

+221
-9
lines changed

agent-memory-client/agent_memory_client/models.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,14 @@ class SessionListResponse(BaseModel):
215215
class WorkingMemoryResponse(WorkingMemory):
216216
"""Response from working memory operations"""
217217

218-
pass
218+
context_percentage_total_used: float | None = Field(
219+
default=None,
220+
description="Percentage of total context window currently used (0-100)",
221+
)
222+
context_percentage_until_summarization: float | None = Field(
223+
default=None,
224+
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
225+
)
219226

220227

221228
class MemoryRecordResult(MemoryRecord):

agent-memory-client/tests/test_client.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,130 @@ def test_validation_with_none_values(self, enhanced_test_client):
653653

654654
# Should not raise
655655
enhanced_test_client.validate_memory_record(memory)
656+
657+
658+
class TestContextUsagePercentage:
659+
"""Tests for context usage percentage functionality."""
660+
661+
@pytest.mark.asyncio
662+
async def test_working_memory_response_with_context_percentages(
663+
self, enhanced_test_client
664+
):
665+
"""Test that WorkingMemoryResponse properly handles both context percentage fields."""
666+
session_id = "test-session"
667+
668+
# Test with both context percentages set
669+
working_memory_response = WorkingMemoryResponse(
670+
session_id=session_id,
671+
messages=[],
672+
memories=[],
673+
data={},
674+
context=None,
675+
user_id=None,
676+
context_percentage_total_used=45.5,
677+
context_percentage_until_summarization=65.0,
678+
)
679+
680+
assert working_memory_response.context_percentage_total_used == 45.5
681+
assert working_memory_response.context_percentage_until_summarization == 65.0
682+
assert working_memory_response.session_id == session_id
683+
684+
# Test with None context percentages (default)
685+
working_memory_response_none = WorkingMemoryResponse(
686+
session_id=session_id,
687+
messages=[],
688+
memories=[],
689+
data={},
690+
context=None,
691+
user_id=None,
692+
)
693+
694+
assert working_memory_response_none.context_percentage_total_used is None
695+
assert (
696+
working_memory_response_none.context_percentage_until_summarization is None
697+
)
698+
699+
@pytest.mark.asyncio
700+
async def test_context_percentages_serialization(self, enhanced_test_client):
701+
"""Test that both context percentage fields are properly serialized."""
702+
session_id = "test-session"
703+
704+
# Create response with both context percentages
705+
working_memory_response = WorkingMemoryResponse(
706+
session_id=session_id,
707+
messages=[],
708+
memories=[],
709+
data={},
710+
context=None,
711+
user_id=None,
712+
context_percentage_total_used=75.0,
713+
context_percentage_until_summarization=85.5,
714+
)
715+
716+
# Test model_dump includes both fields
717+
dumped = working_memory_response.model_dump()
718+
assert "context_percentage_total_used" in dumped
719+
assert "context_percentage_until_summarization" in dumped
720+
assert dumped["context_percentage_total_used"] == 75.0
721+
assert dumped["context_percentage_until_summarization"] == 85.5
722+
723+
# Test JSON serialization
724+
json_data = working_memory_response.model_dump_json()
725+
assert "context_percentage_total_used" in json_data
726+
assert "context_percentage_until_summarization" in json_data
727+
assert "75.0" in json_data
728+
assert "85.5" in json_data
729+
730+
@pytest.mark.asyncio
731+
async def test_context_percentages_validation(self, enhanced_test_client):
732+
"""Test that both context percentage fields accept valid values."""
733+
session_id = "test-session"
734+
735+
# Test valid percentages
736+
valid_percentages = [0.0, 25.5, 50.0, 99.9, 100.0, None]
737+
738+
for percentage in valid_percentages:
739+
working_memory_response = WorkingMemoryResponse(
740+
session_id=session_id,
741+
messages=[],
742+
memories=[],
743+
data={},
744+
context=None,
745+
user_id=None,
746+
context_percentage_total_used=percentage,
747+
context_percentage_until_summarization=percentage,
748+
)
749+
assert working_memory_response.context_percentage_total_used == percentage
750+
assert (
751+
working_memory_response.context_percentage_until_summarization
752+
== percentage
753+
)
754+
755+
def test_working_memory_response_from_dict_with_context_percentages(self):
756+
"""Test that WorkingMemoryResponse can be created from dict with both context percentage fields."""
757+
session_id = "test-session"
758+
759+
# Test creating WorkingMemoryResponse from dict (simulating API response parsing)
760+
response_dict = {
761+
"session_id": session_id,
762+
"messages": [],
763+
"memories": [],
764+
"data": {},
765+
"context": None,
766+
"user_id": None,
767+
"context_percentage_total_used": 33.3,
768+
"context_percentage_until_summarization": 47.5,
769+
"tokens": 0,
770+
"namespace": None,
771+
"ttl_seconds": None,
772+
"last_accessed": "2024-01-01T00:00:00Z",
773+
}
774+
775+
# This simulates what happens when the API client parses the JSON response
776+
result = WorkingMemoryResponse(**response_dict)
777+
778+
# Verify both context percentage fields are included
779+
assert isinstance(result, WorkingMemoryResponse)
780+
assert result.context_percentage_total_used == 33.3
781+
assert result.context_percentage_until_summarization == 47.5
782+
assert result.session_id == session_id

agent_memory_server/api.py

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,45 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
6363
return total_tokens
6464

6565

66+
def _calculate_context_usage_percentages(
67+
messages: list[MemoryMessage],
68+
model_name: ModelNameLiteral | None,
69+
context_window_max: int | None,
70+
) -> tuple[float | None, float | None]:
71+
"""
72+
Calculate context usage percentages for total usage and until summarization triggers.
73+
74+
Args:
75+
messages: List of messages to calculate token count for
76+
model_name: The client's LLM model name for context window determination
77+
context_window_max: Direct specification of context window max tokens
78+
79+
Returns:
80+
Tuple of (total_percentage, until_summarization_percentage)
81+
- total_percentage: Percentage (0-100) of total context window used
82+
- until_summarization_percentage: Percentage (0-100) until summarization triggers
83+
Both values are None if no model info provided
84+
"""
85+
if not messages or (not model_name and not context_window_max):
86+
return None, None
87+
88+
# Calculate current token usage
89+
current_tokens = _calculate_messages_token_count(messages)
90+
91+
# Get effective token limit for the client's model
92+
max_tokens = _get_effective_token_limit(model_name, context_window_max)
93+
94+
# Calculate percentage of total context window used
95+
total_percentage = (current_tokens / max_tokens) * 100.0
96+
97+
# Calculate percentage until summarization threshold
98+
token_threshold = int(max_tokens * settings.summarization_threshold)
99+
until_summarization_percentage = (current_tokens / token_threshold) * 100.0
100+
101+
# Cap both at 100% for display purposes
102+
return min(total_percentage, 100.0), min(until_summarization_percentage, 100.0)
103+
104+
66105
async def _summarize_working_memory(
67106
memory: WorkingMemory,
68107
model_name: ModelNameLiteral | None = None,
@@ -88,8 +127,8 @@ async def _summarize_working_memory(
88127
max_tokens = _get_effective_token_limit(model_name, context_window_max)
89128

90129
# Reserve space for new messages, function calls, and response generation
91-
# Use 70% of context window to leave room for new content
92-
token_threshold = int(max_tokens * 0.7)
130+
# Use configurable threshold to leave room for new content
131+
token_threshold = int(max_tokens * settings.summarization_threshold)
93132

94133
if current_tokens <= token_threshold:
95134
return memory
@@ -269,7 +308,22 @@ async def get_working_memory(
269308

270309
logger.debug(f"Working mem: {working_mem}")
271310

272-
return working_mem
311+
# Calculate context usage percentages
312+
total_percentage, until_summarization_percentage = (
313+
_calculate_context_usage_percentages(
314+
messages=working_mem.messages,
315+
model_name=model_name,
316+
context_window_max=context_window_max,
317+
)
318+
)
319+
320+
# Return WorkingMemoryResponse with both percentage values
321+
working_mem_data = working_mem.model_dump()
322+
working_mem_data["context_percentage_total_used"] = total_percentage
323+
working_mem_data["context_percentage_until_summarization"] = (
324+
until_summarization_percentage
325+
)
326+
return WorkingMemoryResponse(**working_mem_data)
273327

274328

275329
@router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
@@ -348,7 +402,22 @@ async def put_working_memory(
348402
namespace=updated_memory.namespace,
349403
)
350404

351-
return updated_memory
405+
# Calculate context usage percentages based on the final state (after potential summarization)
406+
total_percentage, until_summarization_percentage = (
407+
_calculate_context_usage_percentages(
408+
messages=updated_memory.messages,
409+
model_name=model_name,
410+
context_window_max=context_window_max,
411+
)
412+
)
413+
414+
# Return WorkingMemoryResponse with both percentage values
415+
updated_memory_data = updated_memory.model_dump()
416+
updated_memory_data["context_percentage_total_used"] = total_percentage
417+
updated_memory_data["context_percentage_until_summarization"] = (
418+
until_summarization_percentage
419+
)
420+
return WorkingMemoryResponse(**updated_memory_data)
352421

353422

354423
@router.delete("/v1/working-memory/{session_id}", response_model=AckResponse)

agent_memory_server/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ class Settings(BaseSettings):
119119

120120
# Working memory settings
121121
window_size: int = 20 # Default number of recent messages to return
122+
summarization_threshold: float = (
123+
0.7 # Fraction of context window that triggers summarization
124+
)
122125

123126
# Other Application settings
124127
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"

agent_memory_server/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,15 @@ class WorkingMemory(BaseModel):
222222
class WorkingMemoryResponse(WorkingMemory):
223223
"""Response containing working memory"""
224224

225+
context_percentage_total_used: float | None = Field(
226+
default=None,
227+
description="Percentage of total context window currently used (0-100)",
228+
)
229+
context_percentage_until_summarization: float | None = Field(
230+
default=None,
231+
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
232+
)
233+
225234

226235
class WorkingMemoryRequest(BaseModel):
227236
"""Request parameters for working memory operations"""

docs/memory-types.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,8 @@ Long-term memory supports three types of memories:
202202
# Create long-term memories
203203
POST /v1/long-term-memory/
204204
205-
# Search long-term memories only
205+
# Search long-term memories
206206
POST /v1/long-term-memory/search
207-
208-
# Search across all memory types
209-
POST /v1/memory/search
210207
```
211208

212209
### Search Capabilities

dump.rdb

88 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)