Skip to content

Commit 2acd27b

Browse files
abrookinsclaude
andcommitted
Implement dual context percentage fields for working memory
- Add context_percentage_total_used field showing actual context window usage (0-100%) - Add context_percentage_until_summarization field showing percentage until auto-summarization triggers (0-100%) - Update API calculation function to return both values as tuple - Update server and SDK models with new fields - Update comprehensive test coverage for both fields - Remove old single context_usage_percentage field - Maintain configurable summarization threshold (default 70%) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent ab196df commit 2acd27b

File tree

4 files changed

+92
-53
lines changed

4 files changed

+92
-53
lines changed

agent-memory-client/agent_memory_client/models.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,13 @@ class SessionListResponse(BaseModel):
215215
class WorkingMemoryResponse(WorkingMemory):
216216
"""Response from working memory operations"""
217217

218-
context_usage_percentage: float | None = Field(
218+
context_percentage_total_used: float | None = Field(
219219
default=None,
220-
description="Percentage of context window used before auto-summarization triggers (0-100)",
220+
description="Percentage of total context window currently used (0-100)",
221+
)
222+
context_percentage_until_summarization: float | None = Field(
223+
default=None,
224+
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
221225
)
222226

223227

agent-memory-client/tests/test_client.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -659,27 +659,29 @@ class TestContextUsagePercentage:
659659
"""Tests for context usage percentage functionality."""
660660

661661
@pytest.mark.asyncio
662-
async def test_working_memory_response_with_context_percentage(
662+
async def test_working_memory_response_with_context_percentages(
663663
self, enhanced_test_client
664664
):
665-
"""Test that WorkingMemoryResponse properly handles context_usage_percentage field."""
665+
"""Test that WorkingMemoryResponse properly handles both context percentage fields."""
666666
session_id = "test-session"
667667

668-
# Test with context percentage set
668+
# Test with both context percentages set
669669
working_memory_response = WorkingMemoryResponse(
670670
session_id=session_id,
671671
messages=[],
672672
memories=[],
673673
data={},
674674
context=None,
675675
user_id=None,
676-
context_usage_percentage=45.5,
676+
context_percentage_total_used=45.5,
677+
context_percentage_until_summarization=65.0,
677678
)
678679

679-
assert working_memory_response.context_usage_percentage == 45.5
680+
assert working_memory_response.context_percentage_total_used == 45.5
681+
assert working_memory_response.context_percentage_until_summarization == 65.0
680682
assert working_memory_response.session_id == session_id
681683

682-
# Test with None context percentage (default)
684+
# Test with None context percentages (default)
683685
working_memory_response_none = WorkingMemoryResponse(
684686
session_id=session_id,
685687
messages=[],
@@ -689,37 +691,45 @@ async def test_working_memory_response_with_context_percentage(
689691
user_id=None,
690692
)
691693

692-
assert working_memory_response_none.context_usage_percentage is None
694+
assert working_memory_response_none.context_percentage_total_used is None
695+
assert (
696+
working_memory_response_none.context_percentage_until_summarization is None
697+
)
693698

694699
@pytest.mark.asyncio
695-
async def test_context_percentage_serialization(self, enhanced_test_client):
696-
"""Test that context_usage_percentage is properly serialized."""
700+
async def test_context_percentages_serialization(self, enhanced_test_client):
701+
"""Test that both context percentage fields are properly serialized."""
697702
session_id = "test-session"
698703

699-
# Create response with context percentage
704+
# Create response with both context percentages
700705
working_memory_response = WorkingMemoryResponse(
701706
session_id=session_id,
702707
messages=[],
703708
memories=[],
704709
data={},
705710
context=None,
706711
user_id=None,
707-
context_usage_percentage=75.0,
712+
context_percentage_total_used=75.0,
713+
context_percentage_until_summarization=85.5,
708714
)
709715

710-
# Test model_dump includes the field
716+
# Test model_dump includes both fields
711717
dumped = working_memory_response.model_dump()
712-
assert "context_usage_percentage" in dumped
713-
assert dumped["context_usage_percentage"] == 75.0
718+
assert "context_percentage_total_used" in dumped
719+
assert "context_percentage_until_summarization" in dumped
720+
assert dumped["context_percentage_total_used"] == 75.0
721+
assert dumped["context_percentage_until_summarization"] == 85.5
714722

715723
# Test JSON serialization
716724
json_data = working_memory_response.model_dump_json()
717-
assert "context_usage_percentage" in json_data
725+
assert "context_percentage_total_used" in json_data
726+
assert "context_percentage_until_summarization" in json_data
718727
assert "75.0" in json_data
728+
assert "85.5" in json_data
719729

720730
@pytest.mark.asyncio
721-
async def test_context_percentage_validation(self, enhanced_test_client):
722-
"""Test that context_usage_percentage accepts valid values."""
731+
async def test_context_percentages_validation(self, enhanced_test_client):
732+
"""Test that both context percentage fields accept valid values."""
723733
session_id = "test-session"
724734

725735
# Test valid percentages
@@ -733,12 +743,17 @@ async def test_context_percentage_validation(self, enhanced_test_client):
733743
data={},
734744
context=None,
735745
user_id=None,
736-
context_usage_percentage=percentage,
746+
context_percentage_total_used=percentage,
747+
context_percentage_until_summarization=percentage,
748+
)
749+
assert working_memory_response.context_percentage_total_used == percentage
750+
assert (
751+
working_memory_response.context_percentage_until_summarization
752+
== percentage
737753
)
738-
assert working_memory_response.context_usage_percentage == percentage
739754

740-
def test_working_memory_response_from_dict_with_context_percentage(self):
741-
"""Test that WorkingMemoryResponse can be created from dict with context_usage_percentage."""
755+
def test_working_memory_response_from_dict_with_context_percentages(self):
756+
"""Test that WorkingMemoryResponse can be created from dict with both context percentage fields."""
742757
session_id = "test-session"
743758

744759
# Test creating WorkingMemoryResponse from dict (simulating API response parsing)
@@ -749,7 +764,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self):
749764
"data": {},
750765
"context": None,
751766
"user_id": None,
752-
"context_usage_percentage": 33.3,
767+
"context_percentage_total_used": 33.3,
768+
"context_percentage_until_summarization": 47.5,
753769
"tokens": 0,
754770
"namespace": None,
755771
"ttl_seconds": None,
@@ -759,7 +775,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self):
759775
# This simulates what happens when the API client parses the JSON response
760776
result = WorkingMemoryResponse(**response_dict)
761777

762-
# Verify the context_usage_percentage is included
778+
# Verify both context percentage fields are included
763779
assert isinstance(result, WorkingMemoryResponse)
764-
assert result.context_usage_percentage == 33.3
780+
assert result.context_percentage_total_used == 33.3
781+
assert result.context_percentage_until_summarization == 47.5
765782
assert result.session_id == session_id

agent_memory_server/api.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -63,39 +63,43 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
6363
return total_tokens
6464

6565

66-
def _calculate_context_usage_percentage(
66+
def _calculate_context_usage_percentages(
6767
messages: list[MemoryMessage],
6868
model_name: ModelNameLiteral | None,
6969
context_window_max: int | None,
70-
) -> float | None:
70+
) -> tuple[float | None, float | None]:
7171
"""
72-
Calculate the percentage of context window used before auto-summarization triggers.
72+
Calculate context usage percentages for total usage and until summarization triggers.
7373
7474
Args:
7575
messages: List of messages to calculate token count for
7676
model_name: The client's LLM model name for context window determination
7777
context_window_max: Direct specification of context window max tokens
7878
7979
Returns:
80-
Percentage (0-100) of context used, or None if no model info provided
80+
Tuple of (total_percentage, until_summarization_percentage)
81+
- total_percentage: Percentage (0-100) of total context window used
82+
- until_summarization_percentage: Percentage (0-100) until summarization triggers
83+
Both values are None if no model info provided
8184
"""
8285
if not messages or (not model_name and not context_window_max):
83-
return None
86+
return None, None
8487

8588
# Calculate current token usage
8689
current_tokens = _calculate_messages_token_count(messages)
8790

8891
# Get effective token limit for the client's model
8992
max_tokens = _get_effective_token_limit(model_name, context_window_max)
9093

91-
# Use the same threshold as _summarize_working_memory (reserves space for new content)
92-
token_threshold = int(max_tokens * settings.summarization_threshold)
94+
# Calculate percentage of total context window used
95+
total_percentage = (current_tokens / max_tokens) * 100.0
9396

94-
# Calculate percentage of threshold used
95-
percentage = (current_tokens / token_threshold) * 100.0
97+
# Calculate percentage until summarization threshold
98+
token_threshold = int(max_tokens * settings.summarization_threshold)
99+
until_summarization_percentage = (current_tokens / token_threshold) * 100.0
96100

97-
# Cap at 100% for display purposes
98-
return min(percentage, 100.0)
101+
# Cap both at 100% for display purposes
102+
return min(total_percentage, 100.0), min(until_summarization_percentage, 100.0)
99103

100104

101105
async def _summarize_working_memory(
@@ -304,16 +308,21 @@ async def get_working_memory(
304308

305309
logger.debug(f"Working mem: {working_mem}")
306310

307-
# Calculate context usage percentage
308-
context_usage_percentage = _calculate_context_usage_percentage(
309-
messages=working_mem.messages,
310-
model_name=model_name,
311-
context_window_max=context_window_max,
311+
# Calculate context usage percentages
312+
total_percentage, until_summarization_percentage = (
313+
_calculate_context_usage_percentages(
314+
messages=working_mem.messages,
315+
model_name=model_name,
316+
context_window_max=context_window_max,
317+
)
312318
)
313319

314-
# Return WorkingMemoryResponse with percentage
320+
# Return WorkingMemoryResponse with both percentage values
315321
working_mem_data = working_mem.model_dump()
316-
working_mem_data["context_usage_percentage"] = context_usage_percentage
322+
working_mem_data["context_percentage_total_used"] = total_percentage
323+
working_mem_data["context_percentage_until_summarization"] = (
324+
until_summarization_percentage
325+
)
317326
return WorkingMemoryResponse(**working_mem_data)
318327

319328

@@ -393,16 +402,21 @@ async def put_working_memory(
393402
namespace=updated_memory.namespace,
394403
)
395404

396-
# Calculate context usage percentage based on the final state (after potential summarization)
397-
context_usage_percentage = _calculate_context_usage_percentage(
398-
messages=updated_memory.messages,
399-
model_name=model_name,
400-
context_window_max=context_window_max,
405+
# Calculate context usage percentages based on the final state (after potential summarization)
406+
total_percentage, until_summarization_percentage = (
407+
_calculate_context_usage_percentages(
408+
messages=updated_memory.messages,
409+
model_name=model_name,
410+
context_window_max=context_window_max,
411+
)
401412
)
402413

403-
# Return WorkingMemoryResponse with percentage
414+
# Return WorkingMemoryResponse with both percentage values
404415
updated_memory_data = updated_memory.model_dump()
405-
updated_memory_data["context_usage_percentage"] = context_usage_percentage
416+
updated_memory_data["context_percentage_total_used"] = total_percentage
417+
updated_memory_data["context_percentage_until_summarization"] = (
418+
until_summarization_percentage
419+
)
406420
return WorkingMemoryResponse(**updated_memory_data)
407421

408422

agent_memory_server/models.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,13 @@ class WorkingMemory(BaseModel):
222222
class WorkingMemoryResponse(WorkingMemory):
223223
"""Response containing working memory"""
224224

225-
context_usage_percentage: float | None = Field(
225+
context_percentage_total_used: float | None = Field(
226226
default=None,
227-
description="Percentage of context window used before auto-summarization triggers (0-100)",
227+
description="Percentage of total context window currently used (0-100)",
228+
)
229+
context_percentage_until_summarization: float | None = Field(
230+
default=None,
231+
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
228232
)
229233

230234

0 commit comments

Comments
 (0)