Implement dual context percentage fields for working memory

abrookins · claude · abrookins · commit 2acd27bc9216 · 2025-07-25T14:30:17.000-07:00
- Add context_percentage_total_used field showing actual context window usage (0-100%) - Add context_percentage_until_summarization field showing percentage until auto-summarization triggers (0-100%) - Update API calculation function to return both values as tuple - Update server and SDK models with new fields - Update comprehensive test coverage for both fields - Remove old single context_usage_percentage field - Maintain configurable summarization threshold (default 70%) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
@@ -215,9 +215,13 @@ class SessionListResponse(BaseModel):
 class WorkingMemoryResponse(WorkingMemory):
     """Response from working memory operations"""
 
-    context_usage_percentage: float | None = Field(
+    context_percentage_total_used: float | None = Field(
         default=None,
-        description="Percentage of context window used before auto-summarization triggers (0-100)",
+        description="Percentage of total context window currently used (0-100)",
+    )
+    context_percentage_until_summarization: float | None = Field(
+        default=None,
+        description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
     )
 
 
diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py
@@ -659,27 +659,29 @@ class TestContextUsagePercentage:
     """Tests for context usage percentage functionality."""
 
     @pytest.mark.asyncio
-    async def test_working_memory_response_with_context_percentage(
+    async def test_working_memory_response_with_context_percentages(
         self, enhanced_test_client
     ):
-        """Test that WorkingMemoryResponse properly handles context_usage_percentage field."""
+        """Test that WorkingMemoryResponse properly handles both context percentage fields."""
         session_id = "test-session"
 
-        # Test with context percentage set
+        # Test with both context percentages set
         working_memory_response = WorkingMemoryResponse(
             session_id=session_id,
             messages=[],
             memories=[],
             data={},
             context=None,
             user_id=None,
-            context_usage_percentage=45.5,
+            context_percentage_total_used=45.5,
+            context_percentage_until_summarization=65.0,
         )
 
-        assert working_memory_response.context_usage_percentage == 45.5
+        assert working_memory_response.context_percentage_total_used == 45.5
+        assert working_memory_response.context_percentage_until_summarization == 65.0
         assert working_memory_response.session_id == session_id
 
-        # Test with None context percentage (default)
+        # Test with None context percentages (default)
         working_memory_response_none = WorkingMemoryResponse(
             session_id=session_id,
             messages=[],
@@ -689,37 +691,45 @@ async def test_working_memory_response_with_context_percentage(
             user_id=None,
         )
 
-        assert working_memory_response_none.context_usage_percentage is None
+        assert working_memory_response_none.context_percentage_total_used is None
+        assert (
+            working_memory_response_none.context_percentage_until_summarization is None
+        )
 
     @pytest.mark.asyncio
-    async def test_context_percentage_serialization(self, enhanced_test_client):
-        """Test that context_usage_percentage is properly serialized."""
+    async def test_context_percentages_serialization(self, enhanced_test_client):
+        """Test that both context percentage fields are properly serialized."""
         session_id = "test-session"
 
-        # Create response with context percentage
+        # Create response with both context percentages
         working_memory_response = WorkingMemoryResponse(
             session_id=session_id,
             messages=[],
             memories=[],
             data={},
             context=None,
             user_id=None,
-            context_usage_percentage=75.0,
+            context_percentage_total_used=75.0,
+            context_percentage_until_summarization=85.5,
         )
 
-        # Test model_dump includes the field
+        # Test model_dump includes both fields
         dumped = working_memory_response.model_dump()
-        assert "context_usage_percentage" in dumped
-        assert dumped["context_usage_percentage"] == 75.0
+        assert "context_percentage_total_used" in dumped
+        assert "context_percentage_until_summarization" in dumped
+        assert dumped["context_percentage_total_used"] == 75.0
+        assert dumped["context_percentage_until_summarization"] == 85.5
 
         # Test JSON serialization
         json_data = working_memory_response.model_dump_json()
-        assert "context_usage_percentage" in json_data
+        assert "context_percentage_total_used" in json_data
+        assert "context_percentage_until_summarization" in json_data
         assert "75.0" in json_data
+        assert "85.5" in json_data
 
     @pytest.mark.asyncio
-    async def test_context_percentage_validation(self, enhanced_test_client):
-        """Test that context_usage_percentage accepts valid values."""
+    async def test_context_percentages_validation(self, enhanced_test_client):
+        """Test that both context percentage fields accept valid values."""
         session_id = "test-session"
 
         # Test valid percentages
@@ -733,12 +743,17 @@ async def test_context_percentage_validation(self, enhanced_test_client):
                 data={},
                 context=None,
                 user_id=None,
-                context_usage_percentage=percentage,
+                context_percentage_total_used=percentage,
+                context_percentage_until_summarization=percentage,
+            )
+            assert working_memory_response.context_percentage_total_used == percentage
+            assert (
+                working_memory_response.context_percentage_until_summarization
+                == percentage
             )
-            assert working_memory_response.context_usage_percentage == percentage
 
-    def test_working_memory_response_from_dict_with_context_percentage(self):
-        """Test that WorkingMemoryResponse can be created from dict with context_usage_percentage."""
+    def test_working_memory_response_from_dict_with_context_percentages(self):
+        """Test that WorkingMemoryResponse can be created from dict with both context percentage fields."""
         session_id = "test-session"
 
         # Test creating WorkingMemoryResponse from dict (simulating API response parsing)
@@ -749,7 +764,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self):
             "data": {},
             "context": None,
             "user_id": None,
-            "context_usage_percentage": 33.3,
+            "context_percentage_total_used": 33.3,
+            "context_percentage_until_summarization": 47.5,
             "tokens": 0,
             "namespace": None,
             "ttl_seconds": None,
@@ -759,7 +775,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self):
         # This simulates what happens when the API client parses the JSON response
         result = WorkingMemoryResponse(**response_dict)
 
-        # Verify the context_usage_percentage is included
+        # Verify both context percentage fields are included
         assert isinstance(result, WorkingMemoryResponse)
-        assert result.context_usage_percentage == 33.3
+        assert result.context_percentage_total_used == 33.3
+        assert result.context_percentage_until_summarization == 47.5
         assert result.session_id == session_id
diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py
@@ -63,39 +63,43 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
     return total_tokens
 
 
-def _calculate_context_usage_percentage(
+def _calculate_context_usage_percentages(
     messages: list[MemoryMessage],
     model_name: ModelNameLiteral | None,
     context_window_max: int | None,
-) -> float | None:
+) -> tuple[float | None, float | None]:
     """
-    Calculate the percentage of context window used before auto-summarization triggers.
+    Calculate context usage percentages for total usage and until summarization triggers.
 
     Args:
         messages: List of messages to calculate token count for
         model_name: The client's LLM model name for context window determination
         context_window_max: Direct specification of context window max tokens
 
     Returns:
-        Percentage (0-100) of context used, or None if no model info provided
+        Tuple of (total_percentage, until_summarization_percentage)
+        - total_percentage: Percentage (0-100) of total context window used
+        - until_summarization_percentage: Percentage (0-100) until summarization triggers
+        Both values are None if no model info provided
     """
     if not messages or (not model_name and not context_window_max):
-        return None
+        return None, None
 
     # Calculate current token usage
     current_tokens = _calculate_messages_token_count(messages)
 
     # Get effective token limit for the client's model
     max_tokens = _get_effective_token_limit(model_name, context_window_max)
 
-    # Use the same threshold as _summarize_working_memory (reserves space for new content)
-    token_threshold = int(max_tokens * settings.summarization_threshold)
+    # Calculate percentage of total context window used
+    total_percentage = (current_tokens / max_tokens) * 100.0
 
-    # Calculate percentage of threshold used
-    percentage = (current_tokens / token_threshold) * 100.0
+    # Calculate percentage until summarization threshold
+    token_threshold = int(max_tokens * settings.summarization_threshold)
+    until_summarization_percentage = (current_tokens / token_threshold) * 100.0
 
-    # Cap at 100% for display purposes
-    return min(percentage, 100.0)
+    # Cap both at 100% for display purposes
+    return min(total_percentage, 100.0), min(until_summarization_percentage, 100.0)
 
 
 async def _summarize_working_memory(
@@ -304,16 +308,21 @@ async def get_working_memory(
 
     logger.debug(f"Working mem: {working_mem}")
 
-    # Calculate context usage percentage
-    context_usage_percentage = _calculate_context_usage_percentage(
-        messages=working_mem.messages,
-        model_name=model_name,
-        context_window_max=context_window_max,
+    # Calculate context usage percentages
+    total_percentage, until_summarization_percentage = (
+        _calculate_context_usage_percentages(
+            messages=working_mem.messages,
+            model_name=model_name,
+            context_window_max=context_window_max,
+        )
     )
 
-    # Return WorkingMemoryResponse with percentage
+    # Return WorkingMemoryResponse with both percentage values
     working_mem_data = working_mem.model_dump()
-    working_mem_data["context_usage_percentage"] = context_usage_percentage
+    working_mem_data["context_percentage_total_used"] = total_percentage
+    working_mem_data["context_percentage_until_summarization"] = (
+        until_summarization_percentage
+    )
     return WorkingMemoryResponse(**working_mem_data)
 
 
@@ -393,16 +402,21 @@ async def put_working_memory(
             namespace=updated_memory.namespace,
         )
 
-    # Calculate context usage percentage based on the final state (after potential summarization)
-    context_usage_percentage = _calculate_context_usage_percentage(
-        messages=updated_memory.messages,
-        model_name=model_name,
-        context_window_max=context_window_max,
+    # Calculate context usage percentages based on the final state (after potential summarization)
+    total_percentage, until_summarization_percentage = (
+        _calculate_context_usage_percentages(
+            messages=updated_memory.messages,
+            model_name=model_name,
+            context_window_max=context_window_max,
+        )
     )
 
-    # Return WorkingMemoryResponse with percentage
+    # Return WorkingMemoryResponse with both percentage values
     updated_memory_data = updated_memory.model_dump()
-    updated_memory_data["context_usage_percentage"] = context_usage_percentage
+    updated_memory_data["context_percentage_total_used"] = total_percentage
+    updated_memory_data["context_percentage_until_summarization"] = (
+        until_summarization_percentage
+    )
     return WorkingMemoryResponse(**updated_memory_data)
 
 
diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py
@@ -222,9 +222,13 @@ class WorkingMemory(BaseModel):
 class WorkingMemoryResponse(WorkingMemory):
     """Response containing working memory"""
 
-    context_usage_percentage: float | None = Field(
+    context_percentage_total_used: float | None = Field(
         default=None,
-        description="Percentage of context window used before auto-summarization triggers (0-100)",
+        description="Percentage of total context window currently used (0-100)",
+    )
+    context_percentage_until_summarization: float | None = Field(
+        default=None,
+        description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
     )