OtherVibes
diff --git a/‎src/mcp_as_a_judge/constants.py‎
Lines changed: 2 additions & 3 deletions b/‎src/mcp_as_a_judge/constants.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/mcp_as_a_judge/db/conversation_history_service.py‎
Lines changed: 24 additions & 28 deletions b/‎src/mcp_as_a_judge/db/conversation_history_service.py‎
Lines changed: 24 additions & 28 deletions
diff --git a/‎src/mcp_as_a_judge/db/providers/sqlite_provider.py‎
Lines changed: 22 additions & 31 deletions b/‎src/mcp_as_a_judge/db/providers/sqlite_provider.py‎
Lines changed: 22 additions & 31 deletions
diff --git a/‎src/mcp_as_a_judge/utils/token_utils.py‎ renamed to ‎src/mcp_as_a_judge/db/token_utils.py‎
Lines changed: 19 additions & 21 deletions b/‎src/mcp_as_a_judge/utils/token_utils.py‎ renamed to ‎src/mcp_as_a_judge/db/token_utils.py‎
Lines changed: 19 additions & 21 deletions
@@ -15,6 +15,5 @@
 DATABASE_URL = "sqlite://:memory:"
 MAX_SESSION_RECORDS = 20  # Maximum records to keep per session (FIFO)
 MAX_TOTAL_SESSIONS = 50  # Maximum total sessions to keep (LRU cleanup)
-MAX_CONTEXT_TOKENS = (
-    50000  # Maximum tokens for conversation history context (1 token ≈ 4 characters)
-)
+MAX_CONTEXT_TOKENS = 50000  # Maximum tokens for conversation history context (1 token ≈ 4 characters)
+
@@ -14,7 +14,7 @@
 )
 from mcp_as_a_judge.db.db_config import Config
 from mcp_as_a_judge.logging_config import get_logger
-from mcp_as_a_judge.utils.token_utils import filter_records_by_token_limit
+from mcp_as_a_judge.db.token_utils import filter_records_by_token_limit
 
 # Set up logger
 logger = get_logger(__name__)
@@ -36,18 +36,17 @@ def __init__(
         self.config = config
         self.db = db_provider or create_database_provider(config)
 
-    async def load_context_for_enrichment(
-        self, session_id: str
-    ) -> list[ConversationRecord]:
+    async def load_context_for_enrichment(self, session_id: str, current_prompt: str = "") -> list[ConversationRecord]:
         """
         Load recent conversation records for LLM context enrichment.
 
         Two-level filtering approach:
         1. Database already enforces storage limits (record count + token limits)
-        2. Load-time filtering ensures history + current fits within LLM context limits
+        2. Load-time filtering ensures history + current prompt fits within LLM context limits
 
         Args:
             session_id: Session identifier
+            current_prompt: Current prompt that will be sent to LLM (for token calculation)
 
         Returns:
             List of conversation records for LLM context (filtered for LLM limits)
@@ -62,18 +61,23 @@ async def load_context_for_enrichment(
 
         # Apply LLM context filtering: ensure history + current prompt will fit within token limit
         # This filters the list without modifying the database (only token limit matters for LLM)
-        filtered_records = filter_records_by_token_limit(recent_records)
+        filtered_records = filter_records_by_token_limit(recent_records, current_prompt=current_prompt)
 
         logger.info(
             f"✅ Returning {len(filtered_records)} conversation records for LLM context"
         )
         return filtered_records
 
-    async def save_tool_interaction(
+    async def save_tool_interaction_and_cleanup(
         self, session_id: str, tool_name: str, tool_input: str, tool_output: str
     ) -> str:
         """
-        Save a tool interaction as a conversation record.
+        Save a tool interaction as a conversation record and perform automatic cleanup.in the provider layer
+
+        After saving, the database provider automatically performs cleanup to enforce limits:
+        - Removes old records if session exceeds MAX_SESSION_RECORDS (20)
+        - Removes old records if session exceeds MAX_CONTEXT_TOKENS (50,000)
+        - Removes least recently used sessions if total sessions exceed MAX_TOTAL_SESSIONS (50)
 
         Args:
             session_id: Session identifier from AI agent
@@ -98,31 +102,23 @@ async def save_tool_interaction(
         logger.info(f"✅ Saved conversation record with ID: {record_id}")
         return record_id
 
-    async def get_conversation_history(
-        self, session_id: str
-    ) -> list[ConversationRecord]:
+    async def save_tool_interaction(
+        self, session_id: str, tool_name: str, tool_input: str, tool_output: str
+    ) -> str:
         """
-        Get conversation history for a session to be injected into user prompts.
-
-        Args:
-            session_id: Session identifier
+        Save a tool interaction as a conversation record.
 
-        Returns:
-            List of conversation records for the session (most recent first)
+        DEPRECATED: Use save_tool_interaction_and_cleanup() instead.
+        This method is kept for backward compatibility.
         """
-        logger.info(f"🔄 Loading conversation history for session {session_id}")
-
-        context_records = await self.load_context_for_enrichment(session_id)
-
-        logger.info(
-            f"📝 Retrieved {len(context_records)} conversation records for session {session_id}"
+        logger.warning(
+            "save_tool_interaction() is deprecated. Use save_tool_interaction_and_cleanup() instead."
+        )
+        return await self.save_tool_interaction_and_cleanup(
+            session_id, tool_name, tool_input, tool_output
         )
 
-        return context_records
-
-    def format_conversation_history_as_json_array(
-        self, conversation_history: list[ConversationRecord]
-    ) -> list[dict]:
+    def format_conversation_history_as_json_array( self, conversation_history: list[ConversationRecord]) -> list[dict]:
         """
         Convert conversation history list to JSON array for prompt injection.
 
 
@@ -15,7 +15,7 @@
 from mcp_as_a_judge.db.cleanup_service import ConversationCleanupService
 from mcp_as_a_judge.db.interface import ConversationHistoryDB, ConversationRecord
 from mcp_as_a_judge.logging_config import get_logger
-from mcp_as_a_judge.utils.token_utils import calculate_record_tokens
+from mcp_as_a_judge.db.token_utils import calculate_record_tokens
 
 # Set up logger
 logger = get_logger(__name__)
@@ -101,12 +101,15 @@ def _cleanup_old_messages(self, session_id: str) -> int:
         Two-step process:
         1. If record count > max_records, remove oldest record
         2. If total tokens > max_tokens, remove oldest records until within limit
+
+        Optimization: Single DB query with ORDER BY, then in-memory list operations.
+        Eliminates 2 extra database queries compared to naive implementation.
         """
         with Session(self.engine) as session:
-            # Get current record count
+            # Get current records ordered by timestamp DESC (newest first for token calculation)
             count_stmt = select(ConversationRecord).where(
                 ConversationRecord.session_id == session_id
-            )
+            ).order_by(desc(ConversationRecord.timestamp))
             current_records = session.exec(count_stmt).all()
             current_count = len(current_records)
 
@@ -121,37 +124,25 @@ def _cleanup_old_messages(self, session_id: str) -> int:
             if current_count > self._max_session_records:
                 logger.info("   📊 Record limit exceeded, removing 1 oldest record")
 
-                # Get the oldest record to remove (since we add one by one, only need to remove one)
-                oldest_stmt = (
-                    select(ConversationRecord)
-                    .where(ConversationRecord.session_id == session_id)
-                    .order_by(asc(ConversationRecord.timestamp))
-                    .limit(1)
+                # Take the last record (oldest) since list is sorted by timestamp DESC (newest first)
+                oldest_record = current_records[-1]
+
+                logger.info(
+                    f"   🗑️ Removing oldest record: {oldest_record.source} | {oldest_record.tokens} tokens | {oldest_record.timestamp}"
                 )
-                oldest_record = session.exec(oldest_stmt).first()
+                session.delete(oldest_record)
+                removed_count += 1
+                session.commit()
+                logger.info("   ✅ Removed 1 record due to record limit")
 
-                if oldest_record:
-                    logger.info(
-                        f"   🗑️ Removing oldest record: {oldest_record.source} | {oldest_record.tokens} tokens | {oldest_record.timestamp}"
-                    )
-                    session.delete(oldest_record)
-                    removed_count += 1
-                    session.commit()
-                    logger.info("   ✅ Removed 1 record due to record limit")
+                # Update our in-memory list to reflect the deletion
+                current_records.remove(oldest_record)
 
-            # STEP 2: Handle token limit (check remaining records after step 1)
-            remaining_stmt = (
-                select(ConversationRecord)
-                .where(ConversationRecord.session_id == session_id)
-                .order_by(
-                    desc(ConversationRecord.timestamp)
-                )  # Newest first for token calculation
-            )
-            remaining_records = session.exec(remaining_stmt).all()
-            current_tokens = sum(record.tokens for record in remaining_records)
+            # STEP 2: Handle token limit (list is already sorted newest first - perfect for token calculation)
+            current_tokens = sum(record.tokens for record in current_records)
 
             logger.info(
-                f"   🔢 {len(remaining_records)} records, {current_tokens} tokens "
+                f"   🔢 {len(current_records)} records, {current_tokens} tokens "
                 f"(max: {MAX_CONTEXT_TOKENS})"
             )
 
@@ -164,15 +155,15 @@ def _cleanup_old_messages(self, session_id: str) -> int:
                 records_to_keep = []
                 running_tokens = 0
 
-                for record in remaining_records:  # Already ordered newest first
+                for record in current_records:  # Already ordered newest first
                     if running_tokens + record.tokens <= MAX_CONTEXT_TOKENS:
                         records_to_keep.append(record)
                         running_tokens += record.tokens
                     else:
                         break
 
                 # Remove records that didn't make the cut
-                records_to_remove_for_tokens = remaining_records[len(records_to_keep) :]
+                records_to_remove_for_tokens = current_records[len(records_to_keep) :]
 
                 if records_to_remove_for_tokens:
                     logger.info(
 
@@ -7,6 +7,8 @@
 
 from mcp_as_a_judge.constants import MAX_CONTEXT_TOKENS
 
+from mcp_as_a_judge.db.interface import ConversationRecord
+
 
 def calculate_tokens(text: str) -> int:
     """
@@ -31,20 +33,18 @@ def calculate_tokens(text: str) -> int:
 
 def calculate_record_tokens(input_text: str, output_text: str) -> int:
     """
-    Calculate total token count for a conversation record.
+    Calculate total token count for input and output text.
 
     Combines the token counts of input and output text.
 
     Args:
-        input_text: Tool input text
-        output_text: Tool output text
+       input_text: Input text string
+       output_text: Output text string
 
     Returns:
         Combined token count for both input and output
     """
-    input_tokens = calculate_tokens(input_text)
-    output_tokens = calculate_tokens(output_text)
-    return input_tokens + output_tokens
+    return calculate_tokens(input_text) + calculate_tokens(output_text)
 
 
 def calculate_total_tokens(records: list) -> int:
@@ -61,7 +61,7 @@ def calculate_total_tokens(records: list) -> int:
 
 
 def filter_records_by_token_limit(
-    records: list, max_tokens: int | None = None, max_records: int | None = None
+    records: list, current_prompt: str = ""
 ) -> list:
     """
     Filter conversation records to stay within token and record limits.
@@ -71,36 +71,34 @@ def filter_records_by_token_limit(
 
     Args:
         records: List of ConversationRecord objects (assumed to be in reverse chronological order)
-        max_tokens: Maximum allowed token count (defaults to MAX_CONTEXT_TOKENS from constants)
         max_records: Maximum number of records to keep (optional)
+        current_prompt: Current prompt that will be sent to LLM (for token calculation)
 
     Returns:
         Filtered list of records that fit within the limits
     """
     if not records:
         return []
 
-    # Use default token limit if not specified
-    if max_tokens is None:
-        max_tokens = MAX_CONTEXT_TOKENS
+    # Calculate current prompt tokens
+    current_prompt_tokens = calculate_record_tokens(current_prompt, "") if current_prompt else 0
 
-    # Apply record count limit first if specified
-    if max_records is not None and len(records) > max_records:
-        records = records[:max_records]
+    # Calculate total tokens including current prompt
+    history_tokens = calculate_total_tokens(records)
+    total_tokens = history_tokens + current_prompt_tokens
 
-    # If total tokens are within limit, return all records
-    total_tokens = calculate_total_tokens(records)
-    if total_tokens <= max_tokens:
+    # If total tokens (history + current prompt) are within limit, return all records
+    if total_tokens <= MAX_CONTEXT_TOKENS:
         return records
 
     # Remove oldest records (from the end since records are in reverse chronological order)
-    # until we're within the token limit
+    # until history + current prompt fit within the token limit
     filtered_records = records.copy()
-    current_tokens = total_tokens
+    current_history_tokens = history_tokens
 
-    while current_tokens > max_tokens and len(filtered_records) > 1:
+    while (current_history_tokens + current_prompt_tokens) > MAX_CONTEXT_TOKENS and len(filtered_records) > 1:
         # Remove the oldest record (last in the list)
         removed_record = filtered_records.pop()
-        current_tokens -= getattr(removed_record, "tokens", 0)
+        current_history_tokens -= getattr(removed_record, "tokens", 0)
 
     return filtered_records