Make summarization threshold configurable

abrookins · claude · abrookins · commit ab196dff291c · 2025-07-25T11:58:37.000-07:00
Address review comments by making the 0.7 threshold configurable instead of hardcoded. Added summarization_threshold setting that can be configured via environment variable or config file. - Added summarization_threshold to Settings (default: 0.7) - Updated both _calculate_context_usage_percentage and _summarize_working_memory to use settings.summarization_threshold - Improved maintainability and consistency between functions - Allows users to customize when summarization is triggered 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py
@@ -88,8 +88,8 @@ def _calculate_context_usage_percentage(
     # Get effective token limit for the client's model
     max_tokens = _get_effective_token_limit(model_name, context_window_max)
 
-    # Use the same threshold as _summarize_working_memory (70% of context window)
-    token_threshold = int(max_tokens * 0.7)
+    # Use the same threshold as _summarize_working_memory (reserves space for new content)
+    token_threshold = int(max_tokens * settings.summarization_threshold)
 
     # Calculate percentage of threshold used
     percentage = (current_tokens / token_threshold) * 100.0
@@ -123,8 +123,8 @@ async def _summarize_working_memory(
     max_tokens = _get_effective_token_limit(model_name, context_window_max)
 
     # Reserve space for new messages, function calls, and response generation
-    # Use 70% of context window to leave room for new content
-    token_threshold = int(max_tokens * 0.7)
+    # Use configurable threshold to leave room for new content
+    token_threshold = int(max_tokens * settings.summarization_threshold)
 
     if current_tokens <= token_threshold:
         return memory
diff --git a/agent_memory_server/config.py b/agent_memory_server/config.py
@@ -119,6 +119,9 @@ class Settings(BaseSettings):
 
     # Working memory settings
     window_size: int = 20  # Default number of recent messages to return
+    summarization_threshold: float = (
+        0.7  # Fraction of context window that triggers summarization
+    )
 
     # Other Application settings
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"