From 074bd31f22b51d7a064cf11390325437fecc8b3a Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 16:45:44 +0300 Subject: [PATCH 01/28] feat: remain 2000 session in memory using LRU (to remain old active sessions) and remain 20 records per session with FIFO --- src/mcp_as_a_judge/constants.py | 3 +- src/mcp_as_a_judge/db/cleanup_service.py | 151 +++++++++++++++++- src/mcp_as_a_judge/db/db_config.py | 2 + .../db/providers/sqlite_provider.py | 23 ++- tests/test_conversation_history_lifecycle.py | 121 ++++++++++++++ 5 files changed, 289 insertions(+), 11 deletions(-) diff --git a/src/mcp_as_a_judge/constants.py b/src/mcp_as_a_judge/constants.py index a5b9b04..6e8770a 100644 --- a/src/mcp_as_a_judge/constants.py +++ b/src/mcp_as_a_judge/constants.py @@ -14,4 +14,5 @@ # Database Configuration DATABASE_URL = "sqlite://:memory:" MAX_SESSION_RECORDS = 20 # Maximum records to keep per session (FIFO) -RECORD_RETENTION_DAYS = 1 +MAX_TOTAL_SESSIONS = 2000 # Maximum total sessions to keep (LRU cleanup) +RECORD_RETENTION_DAYS = 1 # Optional time-based cleanup (fallback) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index ceb291d..9a2d6ad 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -7,10 +7,10 @@ from datetime import datetime, timedelta -from sqlalchemy import Engine +from sqlalchemy import Engine, func from sqlmodel import Session, select -from mcp_as_a_judge.constants import RECORD_RETENTION_DAYS +from mcp_as_a_judge.constants import MAX_TOTAL_SESSIONS, RECORD_RETENTION_DAYS from mcp_as_a_judge.db.interface import ConversationRecord from mcp_as_a_judge.logging_config import get_logger @@ -22,8 +22,16 @@ class ConversationCleanupService: """ Service for cleaning up old conversation history records. - Handles time-based cleanup: Removes records older than retention period. - Note: LRU cleanup is handled by the SQLite provider during save operations. + Implements session-based LRU cleanup strategy: + - Maintains max 2000 sessions by removing least recently used sessions + - Runs once per day to avoid performance overhead + + LRU vs FIFO for Better User Experience: + - LRU (Least Recently Used): Keeps sessions that users are actively using, even if they're old + - FIFO (First In, First Out): Would remove oldest sessions regardless of recent activity + - LRU provides better UX because active conversations are preserved longer + + Note: Per-session FIFO cleanup (max 20 records) is handled by the SQLite provider. """ def __init__(self, engine: Engine) -> None: @@ -34,8 +42,10 @@ def __init__(self, engine: Engine) -> None: engine: SQLAlchemy engine for database operations """ self.engine = engine + self.max_total_sessions = MAX_TOTAL_SESSIONS self.retention_days = RECORD_RETENTION_DAYS self.last_cleanup_time = datetime.utcnow() + self.last_session_cleanup_time = datetime.utcnow() def cleanup_old_records(self) -> int: """ @@ -79,3 +89,136 @@ def cleanup_old_records(self) -> int: f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than {self.retention_days} days" ) return old_count + + def get_session_count(self) -> int: + """ + Get the total number of unique sessions in the database. + + Returns: + Number of unique sessions + """ + with Session(self.engine) as session: + # Count distinct session_ids + count_stmt = select(func.count(func.distinct(ConversationRecord.session_id))) + result = session.exec(count_stmt).first() + return result or 0 + + def get_least_recently_used_sessions(self, limit: int) -> list[str]: + """ + Get session IDs of the least recently used sessions. + + Uses LRU strategy: finds sessions with the oldest "last activity" timestamp. + Last activity = MAX(timestamp) for each session (most recent record in session). + + Args: + limit: Number of session IDs to return + + Returns: + List of session IDs ordered by last activity (oldest first) + """ + with Session(self.engine) as session: + # Find sessions with oldest last activity (LRU) + # GROUP BY session_id, ORDER BY MAX(timestamp) ASC to get least recently used + lru_stmt = ( + select( + ConversationRecord.session_id, + func.max(ConversationRecord.timestamp).label("last_activity") + ) + .group_by(ConversationRecord.session_id) + .order_by(func.max(ConversationRecord.timestamp).asc()) + .limit(limit) + ) + + results = session.exec(lru_stmt).all() + return [result.session_id for result in results] + + def delete_sessions(self, session_ids: list[str]) -> int: + """ + Bulk delete all records for the given session IDs. + + Args: + session_ids: List of session IDs to delete + + Returns: + Number of records deleted + """ + if not session_ids: + return 0 + + with Session(self.engine) as session: + # Count records before deletion for logging + count_stmt = select(ConversationRecord).where( + ConversationRecord.session_id.in_(session_ids) + ) + records_to_delete = session.exec(count_stmt).all() + delete_count = len(records_to_delete) + + # Bulk delete all records for these sessions + for record in records_to_delete: + session.delete(record) + + session.commit() + + logger.info( + f"๐Ÿ—‘๏ธ Deleted {delete_count} records from {len(session_ids)} sessions: " + f"{', '.join(session_ids[:3])}{'...' if len(session_ids) > 3 else ''}" + ) + + return delete_count + + def cleanup_excess_sessions(self) -> int: + """ + Remove least recently used sessions when total sessions exceed MAX_TOTAL_SESSIONS. + + This implements LRU (Least Recently Used) cleanup strategy: + - Keeps sessions that users are actively using (better UX than FIFO) + - Only runs once per day to avoid excessive cleanup operations + - During the day, session count can exceed limit (e.g., 5000 sessions is not a memory issue) + - Daily cleanup brings it back to the target limit (2000 sessions) + - Removes entire sessions (all records for those session_ids) + + Returns: + Number of records deleted + """ + # Only run session cleanup once per day + if (datetime.utcnow() - self.last_session_cleanup_time).days < 1: + return 0 + + current_session_count = self.get_session_count() + + if current_session_count <= self.max_total_sessions: + logger.info( + f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions " + f"(max: {self.max_total_sessions}) - no cleanup needed" + ) + self.last_session_cleanup_time = datetime.utcnow() + return 0 + + # Calculate how many sessions to remove + sessions_to_remove = current_session_count - self.max_total_sessions + + logger.info( + f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions exceeds limit " + f"({self.max_total_sessions}), removing {sessions_to_remove} least recently used sessions" + ) + + # Get least recently used sessions + lru_session_ids = self.get_least_recently_used_sessions(sessions_to_remove) + + if not lru_session_ids: + logger.warning("๐Ÿงน No sessions found for LRU cleanup") + self.last_session_cleanup_time = datetime.utcnow() + return 0 + + # Delete all records for these sessions + deleted_count = self.delete_sessions(lru_session_ids) + + # Reset cleanup tracking + self.last_session_cleanup_time = datetime.utcnow() + + logger.info( + f"โœ… Daily session LRU cleanup completed: removed {sessions_to_remove} sessions, " + f"deleted {deleted_count} records" + ) + + return deleted_count diff --git a/src/mcp_as_a_judge/db/db_config.py b/src/mcp_as_a_judge/db/db_config.py index ad7d79f..52b74f8 100644 --- a/src/mcp_as_a_judge/db/db_config.py +++ b/src/mcp_as_a_judge/db/db_config.py @@ -8,6 +8,7 @@ from mcp_as_a_judge.constants import ( DATABASE_URL, MAX_SESSION_RECORDS, + MAX_TOTAL_SESSIONS, RECORD_RETENTION_DAYS, ) @@ -61,6 +62,7 @@ class DatabaseConfig: def __init__(self) -> None: self.url = DATABASE_URL self.max_session_records = MAX_SESSION_RECORDS + self.max_total_sessions = MAX_TOTAL_SESSIONS self.record_retention_days = RECORD_RETENTION_DAYS diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index 03935be..ba5cc4b 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -29,8 +29,9 @@ class SQLiteProvider(ConversationHistoryDB): Features: - SQLModel with SQLAlchemy for type safety - In-memory or file-based SQLite storage - - LRU cleanup per session - - Time-based cleanup (configurable retention) + - Two-level cleanup strategy: + 1. Daily session-based LRU cleanup (max 2000 sessions, removes least recently used) + 2. Per-session FIFO cleanup (max 20 records per session, runs on every save) - Session-based conversation retrieval """ @@ -58,7 +59,9 @@ def __init__(self, max_session_records: int = 20, url: str = "") -> None: logger.info( f"๐Ÿ—„๏ธ SQLModel SQLite provider initialized: {connection_string}, " - f"max_records={max_session_records}, retention_days={self._cleanup_service.retention_days}" + f"max_records_per_session={max_session_records}, " + f"max_total_sessions={self._cleanup_service.max_total_sessions}, " + f"retention_days={self._cleanup_service.retention_days}" ) def _parse_sqlite_url(self, url: str) -> str: @@ -86,6 +89,14 @@ def _cleanup_old_records(self) -> int: """ return self._cleanup_service.cleanup_old_records() + def _cleanup_excess_sessions(self) -> int: + """ + Remove least recently used sessions when total sessions exceed limit. + This implements daily LRU cleanup to maintain max 2000 sessions for better memory management. + Runs once per day - during the day session count can exceed limit without issues. + """ + return self._cleanup_service.cleanup_excess_sessions() + def _cleanup_old_messages(self, session_id: str) -> int: """ Remove old messages from a session using FIFO strategy. @@ -164,10 +175,10 @@ async def save_conversation( logger.info("โœ… Successfully inserted record into conversation_history table") - # Daily cleanup: run once per day to remove old records - self._cleanup_old_records() + # Daily session LRU cleanup: maintain max 2000 sessions (runs once per day) + self._cleanup_excess_sessions() - # Always perform LRU cleanup for this session (lightweight) + # Per-session FIFO cleanup: maintain max 20 records per session (runs on every save) self._cleanup_old_messages(session_id) return record_id diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 3f20a78..914f953 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -198,6 +198,126 @@ async def test_time_based_cleanup_integration(self): f"โœ… After time-based cleanup: {len(records_after)} records (within retention)" ) + @pytest.mark.asyncio + async def test_lru_session_cleanup_lifecycle(self): + """Test LRU session cleanup: keeps most recently used sessions.""" + # Create provider with small session limit for testing + db = SQLiteProvider(max_session_records=20) + + # Override session limit for testing (normally 2000) + db._cleanup_service.max_total_sessions = 3 + + print("\n๐Ÿ”„ TESTING LRU SESSION CLEANUP LIFECYCLE") + print("=" * 60) + + # PHASE 1: Create sessions with different activity patterns + print("๐Ÿ“ PHASE 1: Creating sessions with different activity patterns...") + + # Session A: Created first, but will be most recently used + await db.save_conversation("session_A", "tool1", "input1", "output1") + print(" Session A: Created (oldest creation time)") + + # Session B: Created second + await db.save_conversation("session_B", "tool1", "input1", "output1") + print(" Session B: Created") + + # Session C: Created third + await db.save_conversation("session_C", "tool1", "input1", "output1") + print(" Session C: Created") + + # Session D: Created fourth + await db.save_conversation("session_D", "tool1", "input1", "output1") + print(" Session D: Created") + + # Session E: Created fifth + await db.save_conversation("session_E", "tool1", "input1", "output1") + print(" Session E: Created") + + # Add recent activity to Session A (making it most recently used) + await db.save_conversation("session_A", "tool2", "recent_input", "recent_output") + print(" Session A: Updated with recent activity (most recently used)") + + # Verify all sessions exist + current_count = db._cleanup_service.get_session_count() + assert current_count == 5, f"Expected 5 sessions, got {current_count}" + print(f"โœ… Phase 1: {current_count} sessions created successfully") + + # PHASE 2: Test LRU detection + print("\n๐Ÿ” PHASE 2: Testing LRU detection...") + + # Get least recently used sessions (should be B, C - oldest last activity) + lru_sessions = db._cleanup_service.get_least_recently_used_sessions(2) + print(f" LRU sessions to remove: {lru_sessions}") + + # Should be sessions B and C (oldest last activity) + assert "session_B" in lru_sessions, "Session B should be LRU" + assert "session_C" in lru_sessions, "Session C should be LRU" + assert "session_A" not in lru_sessions, "Session A should NOT be LRU (recent activity)" + print("โœ… Phase 2: LRU detection working correctly") + + # PHASE 3: Trigger LRU cleanup + print("\n๐Ÿงน PHASE 3: Triggering LRU session cleanup...") + + # Force cleanup by mocking old cleanup time + old_time = datetime.utcnow() - timedelta(days=2) + db._cleanup_service.last_session_cleanup_time = old_time + + # Trigger cleanup + deleted_count = db._cleanup_service.cleanup_excess_sessions() + print(f" Deleted {deleted_count} records") + + # Verify session count is now at limit + final_count = db._cleanup_service.get_session_count() + assert final_count == 3, f"Expected 3 sessions after cleanup, got {final_count}" + print(f"โœ… Phase 3: Session count reduced to {final_count}") + + # PHASE 4: Verify which sessions remain + print("\n๐Ÿ” PHASE 4: Verifying remaining sessions...") + + sessions_to_check = ["session_A", "session_B", "session_C", "session_D", "session_E"] + remaining_sessions = [] + deleted_sessions = [] + + for session_id in sessions_to_check: + records = await db.get_session_conversations(session_id) + if records: + remaining_sessions.append(session_id) + last_activity = max(r.timestamp for r in records) + print(f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}") + else: + deleted_sessions.append(session_id) + print(f" โŒ {session_id}: DELETED (was least recently used)") + + # Verify correct sessions were kept/deleted + assert "session_A" in remaining_sessions, "Session A should remain (most recent activity)" + assert "session_D" in remaining_sessions, "Session D should remain (recent creation)" + assert "session_E" in remaining_sessions, "Session E should remain (most recent creation)" + assert "session_B" in deleted_sessions, "Session B should be deleted (LRU)" + assert "session_C" in deleted_sessions, "Session C should be deleted (LRU)" + + print("โœ… Phase 4: LRU cleanup preserved active sessions correctly") + + # PHASE 5: Verify Session A has both records (original + recent activity) + print("\n๐Ÿ“Š PHASE 5: Verifying session record preservation...") + + session_a_records = await db.get_session_conversations("session_A") + assert len(session_a_records) == 2, f"Session A should have 2 records, got {len(session_a_records)}" + + # Check that both records exist + sources = [r.source for r in session_a_records] + assert "tool1" in sources, "Session A should have original tool1 record" + assert "tool2" in sources, "Session A should have recent tool2 record" + + print(f" Session A has {len(session_a_records)} records: {sources}") + print("โœ… Phase 5: Session record preservation working correctly") + + print("\n๐ŸŽฏ LRU CLEANUP SUMMARY:") + print(" - Sessions B, C deleted (least recently used)") + print(" - Sessions A, D, E preserved (most recently used)") + print(" - Session A preserved despite being oldest (recent activity)") + print(" - LRU provides better UX than FIFO!") + print("โœ… LRU session cleanup lifecycle test PASSED!") + print("โœ… Time-based cleanup integration working correctly") @pytest.mark.asyncio @@ -266,6 +386,7 @@ async def run_tests(): await test_instance.test_save_retrieve_fifo_cleanup_lifecycle() await test_instance.test_multiple_sessions_isolation() await test_instance.test_time_based_cleanup_integration() + await test_instance.test_lru_session_cleanup_lifecycle() await test_instance.test_edge_cases_and_error_handling() print("\n๐ŸŽ‰ ALL CONVERSATION HISTORY LIFECYCLE TESTS PASSED!") From 24fe6a3c977e9bbe77dc402d1d09f990d3a4d616 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 16:51:12 +0300 Subject: [PATCH 02/28] fix: reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 6 ++- tests/test_conversation_history_lifecycle.py | 41 +++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 9a2d6ad..6f7062f 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -173,7 +173,8 @@ def cleanup_excess_sessions(self) -> int: This implements LRU (Least Recently Used) cleanup strategy: - Keeps sessions that users are actively using (better UX than FIFO) - Only runs once per day to avoid excessive cleanup operations - - During the day, session count can exceed limit (e.g., 5000 sessions is not a memory issue) + - During the day, session count can exceed limit + (e.g., 5000 sessions is not a memory issue) - Daily cleanup brings it back to the target limit (2000 sessions) - Removes entire sessions (all records for those session_ids) @@ -199,7 +200,8 @@ def cleanup_excess_sessions(self) -> int: logger.info( f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions exceeds limit " - f"({self.max_total_sessions}), removing {sessions_to_remove} least recently used sessions" + f"({self.max_total_sessions}), removing {sessions_to_remove} " + f"least recently used sessions" ) # Get least recently used sessions diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 914f953..f4fb24a 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -234,7 +234,9 @@ async def test_lru_session_cleanup_lifecycle(self): print(" Session E: Created") # Add recent activity to Session A (making it most recently used) - await db.save_conversation("session_A", "tool2", "recent_input", "recent_output") + await db.save_conversation( + "session_A", "tool2", "recent_input", "recent_output" + ) print(" Session A: Updated with recent activity (most recently used)") # Verify all sessions exist @@ -252,7 +254,9 @@ async def test_lru_session_cleanup_lifecycle(self): # Should be sessions B and C (oldest last activity) assert "session_B" in lru_sessions, "Session B should be LRU" assert "session_C" in lru_sessions, "Session C should be LRU" - assert "session_A" not in lru_sessions, "Session A should NOT be LRU (recent activity)" + assert ( + "session_A" not in lru_sessions + ), "Session A should NOT be LRU (recent activity)" print("โœ… Phase 2: LRU detection working correctly") # PHASE 3: Trigger LRU cleanup @@ -268,13 +272,21 @@ async def test_lru_session_cleanup_lifecycle(self): # Verify session count is now at limit final_count = db._cleanup_service.get_session_count() - assert final_count == 3, f"Expected 3 sessions after cleanup, got {final_count}" + assert ( + final_count == 3 + ), f"Expected 3 sessions after cleanup, got {final_count}" print(f"โœ… Phase 3: Session count reduced to {final_count}") # PHASE 4: Verify which sessions remain print("\n๐Ÿ” PHASE 4: Verifying remaining sessions...") - sessions_to_check = ["session_A", "session_B", "session_C", "session_D", "session_E"] + sessions_to_check = [ + "session_A", + "session_B", + "session_C", + "session_D", + "session_E", + ] remaining_sessions = [] deleted_sessions = [] @@ -283,15 +295,24 @@ async def test_lru_session_cleanup_lifecycle(self): if records: remaining_sessions.append(session_id) last_activity = max(r.timestamp for r in records) - print(f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}") + print( + f" โœ… {session_id}: {len(records)} records, " + f"last activity: {last_activity}" + ) else: deleted_sessions.append(session_id) print(f" โŒ {session_id}: DELETED (was least recently used)") # Verify correct sessions were kept/deleted - assert "session_A" in remaining_sessions, "Session A should remain (most recent activity)" - assert "session_D" in remaining_sessions, "Session D should remain (recent creation)" - assert "session_E" in remaining_sessions, "Session E should remain (most recent creation)" + assert ( + "session_A" in remaining_sessions + ), "Session A should remain (most recent activity)" + assert ( + "session_D" in remaining_sessions + ), "Session D should remain (recent creation)" + assert ( + "session_E" in remaining_sessions + ), "Session E should remain (most recent creation)" assert "session_B" in deleted_sessions, "Session B should be deleted (LRU)" assert "session_C" in deleted_sessions, "Session C should be deleted (LRU)" @@ -301,7 +322,9 @@ async def test_lru_session_cleanup_lifecycle(self): print("\n๐Ÿ“Š PHASE 5: Verifying session record preservation...") session_a_records = await db.get_session_conversations("session_A") - assert len(session_a_records) == 2, f"Session A should have 2 records, got {len(session_a_records)}" + assert ( + len(session_a_records) == 2 + ), f"Session A should have 2 records, got {len(session_a_records)}" # Check that both records exist sources = [r.source for r in session_a_records] From 8de1261ee06b89742a4774ba4ebd91a5af913b19 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 16:54:11 +0300 Subject: [PATCH 03/28] fix: reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 19 +++++++++++++------ tests/test_conversation_history_lifecycle.py | 3 ++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 6f7062f..150309a 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -27,8 +27,10 @@ class ConversationCleanupService: - Runs once per day to avoid performance overhead LRU vs FIFO for Better User Experience: - - LRU (Least Recently Used): Keeps sessions that users are actively using, even if they're old - - FIFO (First In, First Out): Would remove oldest sessions regardless of recent activity + - LRU (Least Recently Used): Keeps sessions that users are actively using, + even if they're old + - FIFO (First In, First Out): Would remove oldest sessions regardless of + recent activity - LRU provides better UX because active conversations are preserved longer Note: Per-session FIFO cleanup (max 20 records) is handled by the SQLite provider. @@ -71,7 +73,8 @@ def cleanup_old_records(self) -> int: if old_count == 0: logger.info( - f"๐Ÿงน Daily cleanup: No records older than {self.retention_days} days" + f"๐Ÿงน Daily cleanup: No records older than " + f"{self.retention_days} days" ) self.last_cleanup_time = datetime.utcnow() return 0 @@ -86,7 +89,8 @@ def cleanup_old_records(self) -> int: self.last_cleanup_time = datetime.utcnow() logger.info( - f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than {self.retention_days} days" + f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than " + f"{self.retention_days} days" ) return old_count @@ -99,7 +103,9 @@ def get_session_count(self) -> int: """ with Session(self.engine) as session: # Count distinct session_ids - count_stmt = select(func.count(func.distinct(ConversationRecord.session_id))) + count_stmt = select( + func.count(func.distinct(ConversationRecord.session_id)) + ) result = session.exec(count_stmt).first() return result or 0 @@ -168,7 +174,8 @@ def delete_sessions(self, session_ids: list[str]) -> int: def cleanup_excess_sessions(self) -> int: """ - Remove least recently used sessions when total sessions exceed MAX_TOTAL_SESSIONS. + Remove least recently used sessions when total sessions exceed + MAX_TOTAL_SESSIONS. This implements LRU (Least Recently Used) cleanup strategy: - Keeps sessions that users are actively using (better UX than FIFO) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index f4fb24a..e181bdd 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -195,7 +195,8 @@ async def test_time_based_cleanup_integration(self): records_after = await db.get_session_conversations("time_test_session") assert len(records_after) == 4 print( - f"โœ… After time-based cleanup: {len(records_after)} records (within retention)" + f"โœ… After time-based cleanup: {len(records_after)} records " + f"(within retention)" ) @pytest.mark.asyncio From 9a1fde335a7e10ce680c30d6ec5613ab9e6ae12c Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 16:56:06 +0300 Subject: [PATCH 04/28] fix: reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 150309a..be6d09c 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -128,7 +128,7 @@ def get_least_recently_used_sessions(self, limit: int) -> list[str]: lru_stmt = ( select( ConversationRecord.session_id, - func.max(ConversationRecord.timestamp).label("last_activity") + func.max(ConversationRecord.timestamp).label("last_activity"), ) .group_by(ConversationRecord.session_id) .order_by(func.max(ConversationRecord.timestamp).asc()) From 53995371a7781994c7548c6c909170a93b212885 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 16:58:40 +0300 Subject: [PATCH 05/28] fix: reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 3 +-- tests/test_conversation_history_lifecycle.py | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index be6d09c..504ef51 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -73,8 +73,7 @@ def cleanup_old_records(self) -> int: if old_count == 0: logger.info( - f"๐Ÿงน Daily cleanup: No records older than " - f"{self.retention_days} days" + f"๐Ÿงน Daily cleanup: No records older than {self.retention_days} days" ) self.last_cleanup_time = datetime.utcnow() return 0 diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index e181bdd..7580e12 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -236,7 +236,10 @@ async def test_lru_session_cleanup_lifecycle(self): # Add recent activity to Session A (making it most recently used) await db.save_conversation( - "session_A", "tool2", "recent_input", "recent_output" + "session_A", + "tool2", + "recent_input", + "recent_output", ) print(" Session A: Updated with recent activity (most recently used)") From 1ca13cb0bb6f9439cfa206badf860d57f8db638f Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:02:21 +0300 Subject: [PATCH 06/28] fix: reformat --- tests/test_conversation_history_lifecycle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 7580e12..33fbded 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -236,10 +236,10 @@ async def test_lru_session_cleanup_lifecycle(self): # Add recent activity to Session A (making it most recently used) await db.save_conversation( - "session_A", - "tool2", - "recent_input", - "recent_output", + session_id="session_A", + source="tool2", + input_data="recent_input", + output="recent_output", ) print(" Session A: Updated with recent activity (most recently used)") From 435cd0f94634e07ba68d36dbb39e512a1d4c9dd1 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:09:01 +0300 Subject: [PATCH 07/28] fix: reformat --- tests/test_conversation_history_lifecycle.py | 24 +++++--------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 33fbded..e16fcc3 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -258,9 +258,7 @@ async def test_lru_session_cleanup_lifecycle(self): # Should be sessions B and C (oldest last activity) assert "session_B" in lru_sessions, "Session B should be LRU" assert "session_C" in lru_sessions, "Session C should be LRU" - assert ( - "session_A" not in lru_sessions - ), "Session A should NOT be LRU (recent activity)" + assert "session_A" not in lru_sessions, "Session A should NOT be LRU (recent activity)" print("โœ… Phase 2: LRU detection working correctly") # PHASE 3: Trigger LRU cleanup @@ -276,9 +274,7 @@ async def test_lru_session_cleanup_lifecycle(self): # Verify session count is now at limit final_count = db._cleanup_service.get_session_count() - assert ( - final_count == 3 - ), f"Expected 3 sessions after cleanup, got {final_count}" + assert final_count == 3, f"Expected 3 sessions after cleanup, got {final_count}" print(f"โœ… Phase 3: Session count reduced to {final_count}") # PHASE 4: Verify which sessions remain @@ -308,15 +304,9 @@ async def test_lru_session_cleanup_lifecycle(self): print(f" โŒ {session_id}: DELETED (was least recently used)") # Verify correct sessions were kept/deleted - assert ( - "session_A" in remaining_sessions - ), "Session A should remain (most recent activity)" - assert ( - "session_D" in remaining_sessions - ), "Session D should remain (recent creation)" - assert ( - "session_E" in remaining_sessions - ), "Session E should remain (most recent creation)" + assert "session_A" in remaining_sessions, "Session A should remain (most recent activity)" + assert "session_D" in remaining_sessions, "Session D should remain (recent creation)" + assert "session_E" in remaining_sessions, "Session E should remain (most recent creation)" assert "session_B" in deleted_sessions, "Session B should be deleted (LRU)" assert "session_C" in deleted_sessions, "Session C should be deleted (LRU)" @@ -326,9 +316,7 @@ async def test_lru_session_cleanup_lifecycle(self): print("\n๐Ÿ“Š PHASE 5: Verifying session record preservation...") session_a_records = await db.get_session_conversations("session_A") - assert ( - len(session_a_records) == 2 - ), f"Session A should have 2 records, got {len(session_a_records)}" + assert len(session_a_records) == 2, f"Session A should have 2 records, got {len(session_a_records)}" # Check that both records exist sources = [r.source for r in session_a_records] From 65e42a61991d45532fa126a5a0f3577a5a6d2196 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:21:38 +0300 Subject: [PATCH 08/28] fix: reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 22 ++++----- .../db/providers/sqlite_provider.py | 4 +- tests/test_conversation_history_lifecycle.py | 45 +++++-------------- tests/test_sqlite_comprehensive.py | 4 +- 4 files changed, 26 insertions(+), 49 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 504ef51..2d3d5f5 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,7 +5,7 @@ removing records older than the retention period (default: 1 day). """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from sqlalchemy import Engine, func from sqlmodel import Session, select @@ -46,8 +46,8 @@ def __init__(self, engine: Engine) -> None: self.engine = engine self.max_total_sessions = MAX_TOTAL_SESSIONS self.retention_days = RECORD_RETENTION_DAYS - self.last_cleanup_time = datetime.utcnow() - self.last_session_cleanup_time = datetime.utcnow() + self.last_cleanup_time = datetime.now(timezone.utc) + self.last_session_cleanup_time = datetime.now(timezone.utc) def cleanup_old_records(self) -> int: """ @@ -58,10 +58,10 @@ def cleanup_old_records(self) -> int: Number of records deleted """ # Only run cleanup once per day - if (datetime.utcnow() - self.last_cleanup_time).days < 1: + if (datetime.now(timezone.utc) - self.last_cleanup_time).days < 1: return 0 - cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days) + cutoff_date = datetime.now(timezone.utc) - timedelta(days=self.retention_days) with Session(self.engine) as session: # Count old records @@ -75,7 +75,7 @@ def cleanup_old_records(self) -> int: logger.info( f"๐Ÿงน Daily cleanup: No records older than {self.retention_days} days" ) - self.last_cleanup_time = datetime.utcnow() + self.last_cleanup_time = datetime.now(timezone.utc) return 0 # Delete old records @@ -85,7 +85,7 @@ def cleanup_old_records(self) -> int: session.commit() # Reset cleanup tracking - self.last_cleanup_time = datetime.utcnow() + self.last_cleanup_time = datetime.now(timezone.utc) logger.info( f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than " @@ -188,7 +188,7 @@ def cleanup_excess_sessions(self) -> int: Number of records deleted """ # Only run session cleanup once per day - if (datetime.utcnow() - self.last_session_cleanup_time).days < 1: + if (datetime.now(timezone.utc) - self.last_session_cleanup_time).days < 1: return 0 current_session_count = self.get_session_count() @@ -198,7 +198,7 @@ def cleanup_excess_sessions(self) -> int: f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions " f"(max: {self.max_total_sessions}) - no cleanup needed" ) - self.last_session_cleanup_time = datetime.utcnow() + self.last_session_cleanup_time = datetime.now(timezone.utc) return 0 # Calculate how many sessions to remove @@ -215,14 +215,14 @@ def cleanup_excess_sessions(self) -> int: if not lru_session_ids: logger.warning("๐Ÿงน No sessions found for LRU cleanup") - self.last_session_cleanup_time = datetime.utcnow() + self.last_session_cleanup_time = datetime.now(timezone.utc) return 0 # Delete all records for these sessions deleted_count = self.delete_sessions(lru_session_ids) # Reset cleanup tracking - self.last_session_cleanup_time = datetime.utcnow() + self.last_session_cleanup_time = datetime.now(timezone.utc) logger.info( f"โœ… Daily session LRU cleanup completed: removed {sessions_to_remove} sessions, " diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index ba5cc4b..ce702e2 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -6,7 +6,7 @@ """ import uuid -from datetime import datetime +from datetime import datetime, timezone from sqlalchemy import create_engine from sqlmodel import Session, SQLModel, asc, desc, select @@ -152,7 +152,7 @@ async def save_conversation( ) -> str: """Save a conversation record to SQLite database with LRU cleanup.""" record_id = str(uuid.uuid4()) - timestamp = datetime.utcnow() + timestamp = datetime.now(timezone.utc) logger.info( f"๐Ÿ’พ Saving conversation to SQLModel SQLite DB: record {record_id} " diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index e16fcc3..1741809 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,13 +5,10 @@ """ import asyncio -from datetime import datetime, timedelta - +from datetime import datetime, timedelta, timezone import pytest - from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider - class TestConversationHistoryLifecycle: """Test the complete lifecycle of conversation history records.""" @@ -50,17 +47,13 @@ async def test_save_retrieve_fifo_cleanup_lifecycle(self): # Records should be in reverse chronological order (newest first) sources = [r.source for r in records] expected_sources = ["tool_2", "tool_1", "tool_0"] # Newest first - assert sources == expected_sources, ( - f"Expected {expected_sources}, got {sources}" - ) + assert sources == expected_sources, f"Expected {expected_sources}, got {sources}" # Verify timestamps are in descending order timestamps = [r.timestamp for r in records] for i in range(len(timestamps) - 1): - assert timestamps[i] >= timestamps[i + 1], ( - "Records should be ordered newest first" - ) - + assert timestamps[i] >= timestamps[i + 1], "Records should be ordered newest first" + print(f"โœ… Phase 2: Records retrieved in correct order: {sources}") # PHASE 3: Trigger FIFO cleanup by adding more records @@ -78,16 +71,12 @@ async def test_save_retrieve_fifo_cleanup_lifecycle(self): # Verify FIFO cleanup worked records = await db.get_session_conversations(session_id) - assert len(records) == 3, ( - f"Expected 3 records after cleanup, got {len(records)}" - ) + assert len(records) == 3, f"Expected 3 records after cleanup, got {len(records)}" # Should have the 3 most recent records sources = [r.source for r in records] expected_sources = ["tool_4", "tool_3", "tool_2"] # Most recent 3 - assert sources == expected_sources, ( - f"Expected {expected_sources}, got {sources}" - ) + assert sources == expected_sources, f"Expected {expected_sources}, got {sources}" print(f"โœ… Phase 3: FIFO cleanup worked correctly: {sources}") @@ -180,7 +169,7 @@ async def test_time_based_cleanup_integration(self): print(f"โœ… Before cleanup: {len(records_before)} records") # Force time-based cleanup by mocking old cleanup time - old_time = datetime.utcnow() - timedelta(days=2) + old_time = datetime.now(timezone.utc) - timedelta(days=2) db._last_cleanup_time = old_time # Trigger cleanup by adding another record @@ -194,10 +183,7 @@ async def test_time_based_cleanup_integration(self): # Records should still exist (within retention period) records_after = await db.get_session_conversations("time_test_session") assert len(records_after) == 4 - print( - f"โœ… After time-based cleanup: {len(records_after)} records " - f"(within retention)" - ) + print(f"โœ… After time-based cleanup: {len(records_after)} records (within retention)") @pytest.mark.asyncio async def test_lru_session_cleanup_lifecycle(self): @@ -265,7 +251,7 @@ async def test_lru_session_cleanup_lifecycle(self): print("\n๐Ÿงน PHASE 3: Triggering LRU session cleanup...") # Force cleanup by mocking old cleanup time - old_time = datetime.utcnow() - timedelta(days=2) + old_time = datetime.now(timezone.utc) - timedelta(days=2) db._cleanup_service.last_session_cleanup_time = old_time # Trigger cleanup @@ -280,13 +266,7 @@ async def test_lru_session_cleanup_lifecycle(self): # PHASE 4: Verify which sessions remain print("\n๐Ÿ” PHASE 4: Verifying remaining sessions...") - sessions_to_check = [ - "session_A", - "session_B", - "session_C", - "session_D", - "session_E", - ] + sessions_to_check = ["session_A", "session_B", "session_C", "session_D", "session_E"] remaining_sessions = [] deleted_sessions = [] @@ -295,10 +275,7 @@ async def test_lru_session_cleanup_lifecycle(self): if records: remaining_sessions.append(session_id) last_activity = max(r.timestamp for r in records) - print( - f" โœ… {session_id}: {len(records)} records, " - f"last activity: {last_activity}" - ) + print(f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}") else: deleted_sessions.append(session_id) print(f" โŒ {session_id}: DELETED (was least recently used)") diff --git a/tests/test_sqlite_comprehensive.py b/tests/test_sqlite_comprehensive.py index 7ed086a..f149b29 100644 --- a/tests/test_sqlite_comprehensive.py +++ b/tests/test_sqlite_comprehensive.py @@ -4,7 +4,7 @@ """ import asyncio -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import pytest from test_utils import DatabaseTestUtils @@ -44,7 +44,7 @@ async def test_daily_cleanup_sql(self): db = SQLiteProvider() # Mock old cleanup time to force daily cleanup - old_time = datetime.utcnow() - timedelta(days=2) + old_time = datetime.now(timezone.utc) - timedelta(days=2) db._last_cleanup_time = old_time # Add a record From 1dd12f4a4f5e9c925239375ce24dc459551fad26 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:40:27 +0300 Subject: [PATCH 09/28] fix: UTC --- src/mcp_as_a_judge/db/cleanup_service.py | 22 +++++++++---------- .../db/providers/sqlite_provider.py | 4 ++-- tests/test_conversation_history_lifecycle.py | 10 +++++---- tests/test_sqlite_comprehensive.py | 4 ++-- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 2d3d5f5..228e351 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,7 +5,7 @@ removing records older than the retention period (default: 1 day). """ -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta from sqlalchemy import Engine, func from sqlmodel import Session, select @@ -46,8 +46,8 @@ def __init__(self, engine: Engine) -> None: self.engine = engine self.max_total_sessions = MAX_TOTAL_SESSIONS self.retention_days = RECORD_RETENTION_DAYS - self.last_cleanup_time = datetime.now(timezone.utc) - self.last_session_cleanup_time = datetime.now(timezone.utc) + self.last_cleanup_time = datetime.now(UTC) + self.last_session_cleanup_time = datetime.now(UTC) def cleanup_old_records(self) -> int: """ @@ -58,10 +58,10 @@ def cleanup_old_records(self) -> int: Number of records deleted """ # Only run cleanup once per day - if (datetime.now(timezone.utc) - self.last_cleanup_time).days < 1: + if (datetime.now(UTC) - self.last_cleanup_time).days < 1: return 0 - cutoff_date = datetime.now(timezone.utc) - timedelta(days=self.retention_days) + cutoff_date = datetime.now(UTC) - timedelta(days=self.retention_days) with Session(self.engine) as session: # Count old records @@ -75,7 +75,7 @@ def cleanup_old_records(self) -> int: logger.info( f"๐Ÿงน Daily cleanup: No records older than {self.retention_days} days" ) - self.last_cleanup_time = datetime.now(timezone.utc) + self.last_cleanup_time = datetime.now(UTC) return 0 # Delete old records @@ -85,7 +85,7 @@ def cleanup_old_records(self) -> int: session.commit() # Reset cleanup tracking - self.last_cleanup_time = datetime.now(timezone.utc) + self.last_cleanup_time = datetime.now(UTC) logger.info( f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than " @@ -188,7 +188,7 @@ def cleanup_excess_sessions(self) -> int: Number of records deleted """ # Only run session cleanup once per day - if (datetime.now(timezone.utc) - self.last_session_cleanup_time).days < 1: + if (datetime.now(UTC) - self.last_session_cleanup_time).days < 1: return 0 current_session_count = self.get_session_count() @@ -198,7 +198,7 @@ def cleanup_excess_sessions(self) -> int: f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions " f"(max: {self.max_total_sessions}) - no cleanup needed" ) - self.last_session_cleanup_time = datetime.now(timezone.utc) + self.last_session_cleanup_time = datetime.now(UTC) return 0 # Calculate how many sessions to remove @@ -215,14 +215,14 @@ def cleanup_excess_sessions(self) -> int: if not lru_session_ids: logger.warning("๐Ÿงน No sessions found for LRU cleanup") - self.last_session_cleanup_time = datetime.now(timezone.utc) + self.last_session_cleanup_time = datetime.now(UTC) return 0 # Delete all records for these sessions deleted_count = self.delete_sessions(lru_session_ids) # Reset cleanup tracking - self.last_session_cleanup_time = datetime.now(timezone.utc) + self.last_session_cleanup_time = datetime.now(UTC) logger.info( f"โœ… Daily session LRU cleanup completed: removed {sessions_to_remove} sessions, " diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index ce702e2..a6c7877 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -6,7 +6,7 @@ """ import uuid -from datetime import datetime, timezone +from datetime import UTC, datetime from sqlalchemy import create_engine from sqlmodel import Session, SQLModel, asc, desc, select @@ -152,7 +152,7 @@ async def save_conversation( ) -> str: """Save a conversation record to SQLite database with LRU cleanup.""" record_id = str(uuid.uuid4()) - timestamp = datetime.now(timezone.utc) + timestamp = datetime.now(UTC) logger.info( f"๐Ÿ’พ Saving conversation to SQLModel SQLite DB: record {record_id} " diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 1741809..2bd440c 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,8 +5,10 @@ """ import asyncio -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta + import pytest + from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider class TestConversationHistoryLifecycle: @@ -53,7 +55,7 @@ async def test_save_retrieve_fifo_cleanup_lifecycle(self): timestamps = [r.timestamp for r in records] for i in range(len(timestamps) - 1): assert timestamps[i] >= timestamps[i + 1], "Records should be ordered newest first" - + print(f"โœ… Phase 2: Records retrieved in correct order: {sources}") # PHASE 3: Trigger FIFO cleanup by adding more records @@ -169,7 +171,7 @@ async def test_time_based_cleanup_integration(self): print(f"โœ… Before cleanup: {len(records_before)} records") # Force time-based cleanup by mocking old cleanup time - old_time = datetime.now(timezone.utc) - timedelta(days=2) + old_time = datetime.now(UTC) - timedelta(days=2) db._last_cleanup_time = old_time # Trigger cleanup by adding another record @@ -251,7 +253,7 @@ async def test_lru_session_cleanup_lifecycle(self): print("\n๐Ÿงน PHASE 3: Triggering LRU session cleanup...") # Force cleanup by mocking old cleanup time - old_time = datetime.now(timezone.utc) - timedelta(days=2) + old_time = datetime.now(UTC) - timedelta(days=2) db._cleanup_service.last_session_cleanup_time = old_time # Trigger cleanup diff --git a/tests/test_sqlite_comprehensive.py b/tests/test_sqlite_comprehensive.py index f149b29..5eec95c 100644 --- a/tests/test_sqlite_comprehensive.py +++ b/tests/test_sqlite_comprehensive.py @@ -4,7 +4,7 @@ """ import asyncio -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta import pytest from test_utils import DatabaseTestUtils @@ -44,7 +44,7 @@ async def test_daily_cleanup_sql(self): db = SQLiteProvider() # Mock old cleanup time to force daily cleanup - old_time = datetime.now(timezone.utc) - timedelta(days=2) + old_time = datetime.now(UTC) - timedelta(days=2) db._last_cleanup_time = old_time # Add a record From 29f4aeff6d2d737cda39d50e8f4d6c3f6bb33983 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:53:40 +0300 Subject: [PATCH 10/28] fix: import order --- tests/test_conversation_history_lifecycle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 2bd440c..a279191 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,9 +5,9 @@ """ import asyncio -from datetime import UTC, datetime, timedelta import pytest +from datetime import UTC, datetime, timedelta from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider From 9a6614b4d88e4233aba4e316927d9d0c2db187e7 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 17:55:57 +0300 Subject: [PATCH 11/28] fix: import order in tests --- tests/test_conversation_history_lifecycle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index a279191..2bd440c 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,9 +5,9 @@ """ import asyncio +from datetime import UTC, datetime, timedelta import pytest -from datetime import UTC, datetime, timedelta from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider From e965aa9eefa2a9c23d0a910cb81c0df33bd618af Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 18:04:56 +0300 Subject: [PATCH 12/28] fix: import utc --- src/mcp_as_a_judge/db/cleanup_service.py | 2 +- src/mcp_as_a_judge/db/providers/sqlite_provider.py | 2 +- tests/test_conversation_history_lifecycle.py | 2 +- tests/test_sqlite_comprehensive.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 228e351..d6a4c23 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,7 +5,7 @@ removing records older than the retention period (default: 1 day). """ -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, UTC from sqlalchemy import Engine, func from sqlmodel import Session, select diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index a6c7877..76baa7d 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -6,7 +6,7 @@ """ import uuid -from datetime import UTC, datetime +from datetime import datetime, UTC from sqlalchemy import create_engine from sqlmodel import Session, SQLModel, asc, desc, select diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 2bd440c..9a9a0ee 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,7 +5,7 @@ """ import asyncio -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, UTC import pytest diff --git a/tests/test_sqlite_comprehensive.py b/tests/test_sqlite_comprehensive.py index 5eec95c..b1370c6 100644 --- a/tests/test_sqlite_comprehensive.py +++ b/tests/test_sqlite_comprehensive.py @@ -4,7 +4,7 @@ """ import asyncio -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, UTC import pytest from test_utils import DatabaseTestUtils From 8ff6154d999fa0c0666862eafad49a4cdd32dd17 Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 18:07:04 +0300 Subject: [PATCH 13/28] fix: import utc --- src/mcp_as_a_judge/db/providers/sqlite_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index 76baa7d..d0ce25a 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -5,8 +5,8 @@ It supports both in-memory (:memory:) and file-based SQLite storage. """ -import uuid from datetime import datetime, UTC +import uuid from sqlalchemy import create_engine from sqlmodel import Session, SQLModel, asc, desc, select From cf07803b31030e973b621f29b16c9705e678598c Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 18:13:20 +0300 Subject: [PATCH 14/28] fix: ruff fix --- src/mcp_as_a_judge/db/cleanup_service.py | 2 +- src/mcp_as_a_judge/db/providers/sqlite_provider.py | 2 +- tests/test_conversation_history_lifecycle.py | 3 ++- tests/test_sqlite_comprehensive.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index d6a4c23..228e351 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,7 +5,7 @@ removing records older than the retention period (default: 1 day). """ -from datetime import datetime, timedelta, UTC +from datetime import UTC, datetime, timedelta from sqlalchemy import Engine, func from sqlmodel import Session, select diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index d0ce25a..a6c7877 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -5,8 +5,8 @@ It supports both in-memory (:memory:) and file-based SQLite storage. """ -from datetime import datetime, UTC import uuid +from datetime import UTC, datetime from sqlalchemy import create_engine from sqlmodel import Session, SQLModel, asc, desc, select diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 9a9a0ee..0ad301b 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,12 +5,13 @@ """ import asyncio -from datetime import datetime, timedelta, UTC +from datetime import UTC, datetime, timedelta import pytest from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider + class TestConversationHistoryLifecycle: """Test the complete lifecycle of conversation history records.""" diff --git a/tests/test_sqlite_comprehensive.py b/tests/test_sqlite_comprehensive.py index b1370c6..5eec95c 100644 --- a/tests/test_sqlite_comprehensive.py +++ b/tests/test_sqlite_comprehensive.py @@ -4,7 +4,7 @@ """ import asyncio -from datetime import datetime, timedelta, UTC +from datetime import UTC, datetime, timedelta import pytest from test_utils import DatabaseTestUtils From 715aedad027352004ba89abef8fe25582628c30a Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 18:14:40 +0300 Subject: [PATCH 15/28] fix: ruff fix --- tests/test_conversation_history_lifecycle.py | 52 +++++++++++++++----- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 0ad301b..7a3165f 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -50,12 +50,16 @@ async def test_save_retrieve_fifo_cleanup_lifecycle(self): # Records should be in reverse chronological order (newest first) sources = [r.source for r in records] expected_sources = ["tool_2", "tool_1", "tool_0"] # Newest first - assert sources == expected_sources, f"Expected {expected_sources}, got {sources}" + assert sources == expected_sources, ( + f"Expected {expected_sources}, got {sources}" + ) # Verify timestamps are in descending order timestamps = [r.timestamp for r in records] for i in range(len(timestamps) - 1): - assert timestamps[i] >= timestamps[i + 1], "Records should be ordered newest first" + assert timestamps[i] >= timestamps[i + 1], ( + "Records should be ordered newest first" + ) print(f"โœ… Phase 2: Records retrieved in correct order: {sources}") @@ -74,12 +78,16 @@ async def test_save_retrieve_fifo_cleanup_lifecycle(self): # Verify FIFO cleanup worked records = await db.get_session_conversations(session_id) - assert len(records) == 3, f"Expected 3 records after cleanup, got {len(records)}" + assert len(records) == 3, ( + f"Expected 3 records after cleanup, got {len(records)}" + ) # Should have the 3 most recent records sources = [r.source for r in records] expected_sources = ["tool_4", "tool_3", "tool_2"] # Most recent 3 - assert sources == expected_sources, f"Expected {expected_sources}, got {sources}" + assert sources == expected_sources, ( + f"Expected {expected_sources}, got {sources}" + ) print(f"โœ… Phase 3: FIFO cleanup worked correctly: {sources}") @@ -186,7 +194,9 @@ async def test_time_based_cleanup_integration(self): # Records should still exist (within retention period) records_after = await db.get_session_conversations("time_test_session") assert len(records_after) == 4 - print(f"โœ… After time-based cleanup: {len(records_after)} records (within retention)") + print( + f"โœ… After time-based cleanup: {len(records_after)} records (within retention)" + ) @pytest.mark.asyncio async def test_lru_session_cleanup_lifecycle(self): @@ -247,7 +257,9 @@ async def test_lru_session_cleanup_lifecycle(self): # Should be sessions B and C (oldest last activity) assert "session_B" in lru_sessions, "Session B should be LRU" assert "session_C" in lru_sessions, "Session C should be LRU" - assert "session_A" not in lru_sessions, "Session A should NOT be LRU (recent activity)" + assert "session_A" not in lru_sessions, ( + "Session A should NOT be LRU (recent activity)" + ) print("โœ… Phase 2: LRU detection working correctly") # PHASE 3: Trigger LRU cleanup @@ -269,7 +281,13 @@ async def test_lru_session_cleanup_lifecycle(self): # PHASE 4: Verify which sessions remain print("\n๐Ÿ” PHASE 4: Verifying remaining sessions...") - sessions_to_check = ["session_A", "session_B", "session_C", "session_D", "session_E"] + sessions_to_check = [ + "session_A", + "session_B", + "session_C", + "session_D", + "session_E", + ] remaining_sessions = [] deleted_sessions = [] @@ -278,15 +296,23 @@ async def test_lru_session_cleanup_lifecycle(self): if records: remaining_sessions.append(session_id) last_activity = max(r.timestamp for r in records) - print(f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}") + print( + f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}" + ) else: deleted_sessions.append(session_id) print(f" โŒ {session_id}: DELETED (was least recently used)") # Verify correct sessions were kept/deleted - assert "session_A" in remaining_sessions, "Session A should remain (most recent activity)" - assert "session_D" in remaining_sessions, "Session D should remain (recent creation)" - assert "session_E" in remaining_sessions, "Session E should remain (most recent creation)" + assert "session_A" in remaining_sessions, ( + "Session A should remain (most recent activity)" + ) + assert "session_D" in remaining_sessions, ( + "Session D should remain (recent creation)" + ) + assert "session_E" in remaining_sessions, ( + "Session E should remain (most recent creation)" + ) assert "session_B" in deleted_sessions, "Session B should be deleted (LRU)" assert "session_C" in deleted_sessions, "Session C should be deleted (LRU)" @@ -296,7 +322,9 @@ async def test_lru_session_cleanup_lifecycle(self): print("\n๐Ÿ“Š PHASE 5: Verifying session record preservation...") session_a_records = await db.get_session_conversations("session_A") - assert len(session_a_records) == 2, f"Session A should have 2 records, got {len(session_a_records)}" + assert len(session_a_records) == 2, ( + f"Session A should have 2 records, got {len(session_a_records)}" + ) # Check that both records exist sources = [r.source for r in session_a_records] From 7333816c1a84d9b28771c3cc49114ff39422579d Mon Sep 17 00:00:00 2001 From: dori Date: Sun, 7 Sep 2025 18:27:25 +0300 Subject: [PATCH 16/28] fix: s "str" has no attribute "in_" [attr-defined] --- src/mcp_as_a_judge/db/cleanup_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 228e351..f523601 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -135,7 +135,7 @@ def get_least_recently_used_sessions(self, limit: int) -> list[str]: ) results = session.exec(lru_stmt).all() - return [result.session_id for result in results] + return [result[0] for result in results] def delete_sessions(self, session_ids: list[str]) -> int: """ @@ -153,7 +153,7 @@ def delete_sessions(self, session_ids: list[str]) -> int: with Session(self.engine) as session: # Count records before deletion for logging count_stmt = select(ConversationRecord).where( - ConversationRecord.session_id.in_(session_ids) + ConversationRecord.session_id.in_(session_ids) # type: ignore[attr-defined] ) records_to_delete = session.exec(count_stmt).all() delete_count = len(records_to_delete) From ae7ca1211b76d4dc8a3dde85122603048f143b69 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 09:09:29 +0300 Subject: [PATCH 17/28] fix: reduce max session and increase retention day --- src/mcp_as_a_judge/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcp_as_a_judge/constants.py b/src/mcp_as_a_judge/constants.py index 6e8770a..e0a5e5e 100644 --- a/src/mcp_as_a_judge/constants.py +++ b/src/mcp_as_a_judge/constants.py @@ -14,5 +14,5 @@ # Database Configuration DATABASE_URL = "sqlite://:memory:" MAX_SESSION_RECORDS = 20 # Maximum records to keep per session (FIFO) -MAX_TOTAL_SESSIONS = 2000 # Maximum total sessions to keep (LRU cleanup) -RECORD_RETENTION_DAYS = 1 # Optional time-based cleanup (fallback) +MAX_TOTAL_SESSIONS = 50 # Maximum total sessions to keep (LRU cleanup) +RECORD_RETENTION_DAYS = 10 # Optional time-based cleanup (fallback) From 126a3d0547303a87e0561f399f3dcfaa3ea23888 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 09:57:40 +0300 Subject: [PATCH 18/28] fix: Clean old session if needed - check is done on create new session --- src/mcp_as_a_judge/constants.py | 1 - src/mcp_as_a_judge/db/cleanup_service.py | 80 ++------ src/mcp_as_a_judge/db/db_config.py | 2 - .../db/providers/sqlite_provider.py | 37 ++-- tests/test_conversation_history_lifecycle.py | 190 +++++++++--------- 5 files changed, 130 insertions(+), 180 deletions(-) diff --git a/src/mcp_as_a_judge/constants.py b/src/mcp_as_a_judge/constants.py index e0a5e5e..46d90ac 100644 --- a/src/mcp_as_a_judge/constants.py +++ b/src/mcp_as_a_judge/constants.py @@ -15,4 +15,3 @@ DATABASE_URL = "sqlite://:memory:" MAX_SESSION_RECORDS = 20 # Maximum records to keep per session (FIFO) MAX_TOTAL_SESSIONS = 50 # Maximum total sessions to keep (LRU cleanup) -RECORD_RETENTION_DAYS = 10 # Optional time-based cleanup (fallback) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index f523601..0da3c75 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -1,16 +1,16 @@ """ Database cleanup service for conversation history records. -This service handles time-based cleanup operations for conversation history records, -removing records older than the retention period (default: 1 day). +This service handles LRU-based cleanup operations for conversation history records, +removing least recently used sessions when session limits are exceeded. """ -from datetime import UTC, datetime, timedelta +from datetime import UTC, datetime from sqlalchemy import Engine, func from sqlmodel import Session, select -from mcp_as_a_judge.constants import MAX_TOTAL_SESSIONS, RECORD_RETENTION_DAYS +from mcp_as_a_judge.constants import MAX_TOTAL_SESSIONS from mcp_as_a_judge.db.interface import ConversationRecord from mcp_as_a_judge.logging_config import get_logger @@ -20,11 +20,11 @@ class ConversationCleanupService: """ - Service for cleaning up old conversation history records. + Service for cleaning up conversation history records. Implements session-based LRU cleanup strategy: - - Maintains max 2000 sessions by removing least recently used sessions - - Runs once per day to avoid performance overhead + - Maintains session limit by removing least recently used sessions + - Runs immediately when new sessions are created and limit is exceeded LRU vs FIFO for Better User Experience: - LRU (Least Recently Used): Keeps sessions that users are actively using, @@ -45,53 +45,8 @@ def __init__(self, engine: Engine) -> None: """ self.engine = engine self.max_total_sessions = MAX_TOTAL_SESSIONS - self.retention_days = RECORD_RETENTION_DAYS - self.last_cleanup_time = datetime.now(UTC) - self.last_session_cleanup_time = datetime.now(UTC) - def cleanup_old_records(self) -> int: - """ - Remove records older than retention_days. - This runs once per day to avoid excessive cleanup operations. - - Returns: - Number of records deleted - """ - # Only run cleanup once per day - if (datetime.now(UTC) - self.last_cleanup_time).days < 1: - return 0 - - cutoff_date = datetime.now(UTC) - timedelta(days=self.retention_days) - - with Session(self.engine) as session: - # Count old records - old_count_stmt = select(ConversationRecord).where( - ConversationRecord.timestamp < cutoff_date - ) - old_records = session.exec(old_count_stmt).all() - old_count = len(old_records) - - if old_count == 0: - logger.info( - f"๐Ÿงน Daily cleanup: No records older than {self.retention_days} days" - ) - self.last_cleanup_time = datetime.now(UTC) - return 0 - - # Delete old records - for record in old_records: - session.delete(record) - session.commit() - - # Reset cleanup tracking - self.last_cleanup_time = datetime.now(UTC) - - logger.info( - f"๐Ÿงน Daily cleanup: Deleted {old_count} records older than " - f"{self.retention_days} days" - ) - return old_count def get_session_count(self) -> int: """ @@ -178,34 +133,27 @@ def cleanup_excess_sessions(self) -> int: This implements LRU (Least Recently Used) cleanup strategy: - Keeps sessions that users are actively using (better UX than FIFO) - - Only runs once per day to avoid excessive cleanup operations - - During the day, session count can exceed limit - (e.g., 5000 sessions is not a memory issue) - - Daily cleanup brings it back to the target limit (2000 sessions) + - Runs immediately when session limit is exceeded (no daily restriction) - Removes entire sessions (all records for those session_ids) + - Called every time a new session is created to maintain session limit Returns: Number of records deleted """ - # Only run session cleanup once per day - if (datetime.now(UTC) - self.last_session_cleanup_time).days < 1: - return 0 - current_session_count = self.get_session_count() if current_session_count <= self.max_total_sessions: logger.info( - f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions " + f"๐Ÿงน Session LRU cleanup: {current_session_count} sessions " f"(max: {self.max_total_sessions}) - no cleanup needed" ) - self.last_session_cleanup_time = datetime.now(UTC) return 0 # Calculate how many sessions to remove sessions_to_remove = current_session_count - self.max_total_sessions logger.info( - f"๐Ÿงน Daily session LRU cleanup: {current_session_count} sessions exceeds limit " + f"๐Ÿงน Session LRU cleanup: {current_session_count} sessions exceeds limit " f"({self.max_total_sessions}), removing {sessions_to_remove} " f"least recently used sessions" ) @@ -215,17 +163,13 @@ def cleanup_excess_sessions(self) -> int: if not lru_session_ids: logger.warning("๐Ÿงน No sessions found for LRU cleanup") - self.last_session_cleanup_time = datetime.now(UTC) return 0 # Delete all records for these sessions deleted_count = self.delete_sessions(lru_session_ids) - # Reset cleanup tracking - self.last_session_cleanup_time = datetime.now(UTC) - logger.info( - f"โœ… Daily session LRU cleanup completed: removed {sessions_to_remove} sessions, " + f"โœ… Session LRU cleanup completed: removed {sessions_to_remove} sessions, " f"deleted {deleted_count} records" ) diff --git a/src/mcp_as_a_judge/db/db_config.py b/src/mcp_as_a_judge/db/db_config.py index 52b74f8..a667b53 100644 --- a/src/mcp_as_a_judge/db/db_config.py +++ b/src/mcp_as_a_judge/db/db_config.py @@ -9,7 +9,6 @@ DATABASE_URL, MAX_SESSION_RECORDS, MAX_TOTAL_SESSIONS, - RECORD_RETENTION_DAYS, ) @@ -63,7 +62,6 @@ def __init__(self) -> None: self.url = DATABASE_URL self.max_session_records = MAX_SESSION_RECORDS self.max_total_sessions = MAX_TOTAL_SESSIONS - self.record_retention_days = RECORD_RETENTION_DAYS class Config: diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index a6c7877..fc39b8d 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -30,7 +30,7 @@ class SQLiteProvider(ConversationHistoryDB): - SQLModel with SQLAlchemy for type safety - In-memory or file-based SQLite storage - Two-level cleanup strategy: - 1. Daily session-based LRU cleanup (max 2000 sessions, removes least recently used) + 1. Session-based LRU cleanup (runs when new sessions are created, removes least recently used) 2. Per-session FIFO cleanup (max 20 records per session, runs on every save) - Session-based conversation retrieval """ @@ -51,7 +51,7 @@ def __init__(self, max_session_records: int = 20, url: str = "") -> None: self._max_session_records = max_session_records - # Initialize cleanup service for time-based cleanup + # Initialize cleanup service for LRU session cleanup self._cleanup_service = ConversationCleanupService(engine=self.engine) # Create tables @@ -60,8 +60,7 @@ def __init__(self, max_session_records: int = 20, url: str = "") -> None: logger.info( f"๐Ÿ—„๏ธ SQLModel SQLite provider initialized: {connection_string}, " f"max_records_per_session={max_session_records}, " - f"max_total_sessions={self._cleanup_service.max_total_sessions}, " - f"retention_days={self._cleanup_service.retention_days}" + f"max_total_sessions={self._cleanup_service.max_total_sessions}" ) def _parse_sqlite_url(self, url: str) -> str: @@ -82,18 +81,13 @@ def _create_tables(self) -> None: SQLModel.metadata.create_all(self.engine) logger.info("๐Ÿ“‹ Created conversation_history table with SQLModel") - def _cleanup_old_records(self) -> int: - """ - Remove records older than retention_days using the cleanup service. - This runs once per day to avoid excessive cleanup operations. - """ - return self._cleanup_service.cleanup_old_records() + def _cleanup_excess_sessions(self) -> int: """ Remove least recently used sessions when total sessions exceed limit. - This implements daily LRU cleanup to maintain max 2000 sessions for better memory management. - Runs once per day - during the day session count can exceed limit without issues. + This implements LRU cleanup to maintain session limit for better memory management. + Runs immediately when new sessions are created and limit is exceeded. """ return self._cleanup_service.cleanup_excess_sessions() @@ -147,6 +141,16 @@ def _cleanup_old_messages(self, session_id: str) -> int: ) return len(old_records) + def _is_new_session(self, session_id: str) -> bool: + """Check if this is a new session (no existing records).""" + with Session(self.engine) as session: + existing_record = session.exec( + select(ConversationRecord).where( + ConversationRecord.session_id == session_id + ).limit(1) + ).first() + return existing_record is None + async def save_conversation( self, session_id: str, source: str, input_data: str, output: str ) -> str: @@ -159,6 +163,9 @@ async def save_conversation( f"for session {session_id}, source {source} at {timestamp}" ) + # Check if this is a new session before saving + is_new_session = self._is_new_session(session_id) + # Create new record record = ConversationRecord( id=record_id, @@ -175,8 +182,10 @@ async def save_conversation( logger.info("โœ… Successfully inserted record into conversation_history table") - # Daily session LRU cleanup: maintain max 2000 sessions (runs once per day) - self._cleanup_excess_sessions() + # Session LRU cleanup: only run when a new session is created + if is_new_session: + logger.info(f"๐Ÿ†• New session detected: {session_id}, running LRU cleanup") + self._cleanup_excess_sessions() # Per-session FIFO cleanup: maintain max 20 records per session (runs on every save) self._cleanup_old_messages(session_id) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 7a3165f..007cdb9 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -158,46 +158,54 @@ async def test_multiple_sessions_isolation(self): print(f" Session B: {sources_b}") @pytest.mark.asyncio - async def test_time_based_cleanup_integration(self): - """Test integration of FIFO cleanup with time-based cleanup.""" + async def test_immediate_cleanup_integration(self): + """Test integration of FIFO cleanup with immediate LRU session cleanup.""" db = SQLiteProvider(max_session_records=5) + # Set a small session limit for testing + db._cleanup_service.max_total_sessions = 2 - print("\n๐Ÿ”„ TESTING TIME-BASED CLEANUP INTEGRATION") + print("\n๐Ÿ”„ TESTING IMMEDIATE CLEANUP INTEGRATION") print("=" * 60) - # Add some records + # Add records to first session for i in range(3): await db.save_conversation( - session_id="time_test_session", + session_id="session_1", source=f"tool_{i}", input_data=f"Input {i}", output=f"Output {i}", ) # Verify records exist - records_before = await db.get_session_conversations("time_test_session") + records_before = await db.get_session_conversations("session_1") assert len(records_before) == 3 - print(f"โœ… Before cleanup: {len(records_before)} records") + print(f"โœ… Session 1 records: {len(records_before)}") - # Force time-based cleanup by mocking old cleanup time - old_time = datetime.now(UTC) - timedelta(days=2) - db._last_cleanup_time = old_time - - # Trigger cleanup by adding another record + # Create second session (should not trigger session cleanup yet) await db.save_conversation( - session_id="time_test_session", - source="tool_trigger", - input_data="Trigger cleanup", - output="Cleanup triggered", + session_id="session_2", + source="tool_1", + input_data="Input 1", + output="Output 1", ) - # Records should still exist (within retention period) - records_after = await db.get_session_conversations("time_test_session") - assert len(records_after) == 4 - print( - f"โœ… After time-based cleanup: {len(records_after)} records (within retention)" + # Both sessions should exist + assert db._cleanup_service.get_session_count() == 2 + print("โœ… Both sessions exist (at limit)") + + # Create third session (should trigger LRU session cleanup) + await db.save_conversation( + session_id="session_3", + source="tool_1", + input_data="Input 1", + output="Output 1", ) + # Should still have only 2 sessions (LRU cleanup triggered) + final_count = db._cleanup_service.get_session_count() + assert final_count == 2 + print(f"โœ… After immediate LRU cleanup: {final_count} sessions (limit maintained)") + @pytest.mark.asyncio async def test_lru_session_cleanup_lifecycle(self): """Test LRU session cleanup: keeps most recently used sessions.""" @@ -225,61 +233,59 @@ async def test_lru_session_cleanup_lifecycle(self): await db.save_conversation("session_C", "tool1", "input1", "output1") print(" Session C: Created") - # Session D: Created fourth - await db.save_conversation("session_D", "tool1", "input1", "output1") - print(" Session D: Created") - - # Session E: Created fifth - await db.save_conversation("session_E", "tool1", "input1", "output1") - print(" Session E: Created") - - # Add recent activity to Session A (making it most recently used) + # Add recent activity to Session A BEFORE creating more sessions + # This ensures Session A becomes most recently used before cleanup await db.save_conversation( session_id="session_A", source="tool2", input_data="recent_input", output="recent_output", ) - print(" Session A: Updated with recent activity (most recently used)") + print(" Session A: Updated with recent activity (now most recently used)") + + # Session D: Created fourth (should trigger cleanup, but Session A should be preserved) + await db.save_conversation("session_D", "tool1", "input1", "output1") + print(" Session D: Created (cleanup triggered)") + + # Session E: Created fifth (should trigger cleanup again) + await db.save_conversation("session_E", "tool1", "input1", "output1") + print(" Session E: Created (cleanup triggered again)") - # Verify all sessions exist + # Verify automatic LRU cleanup happened (should be at limit of 3) current_count = db._cleanup_service.get_session_count() - assert current_count == 5, f"Expected 5 sessions, got {current_count}" - print(f"โœ… Phase 1: {current_count} sessions created successfully") + assert current_count == 3, f"Expected 3 sessions after automatic cleanup, got {current_count}" + print(f"โœ… Phase 1: Automatic LRU cleanup maintained limit - {current_count} sessions remain") - # PHASE 2: Test LRU detection - print("\n๐Ÿ” PHASE 2: Testing LRU detection...") + # PHASE 2: Verify which sessions remain after automatic cleanup + print("\n๐Ÿ” PHASE 2: Verifying remaining sessions after automatic cleanup...") - # Get least recently used sessions (should be B, C - oldest last activity) - lru_sessions = db._cleanup_service.get_least_recently_used_sessions(2) - print(f" LRU sessions to remove: {lru_sessions}") + # Check which sessions still exist + remaining_sessions = [] + for session_id in ["session_A", "session_B", "session_C", "session_D", "session_E"]: + records = await db.get_session_conversations(session_id) + if records: + remaining_sessions.append(session_id) - # Should be sessions B and C (oldest last activity) - assert "session_B" in lru_sessions, "Session B should be LRU" - assert "session_C" in lru_sessions, "Session C should be LRU" - assert "session_A" not in lru_sessions, ( - "Session A should NOT be LRU (recent activity)" - ) - print("โœ… Phase 2: LRU detection working correctly") + print(f" Remaining sessions: {remaining_sessions}") + assert len(remaining_sessions) == 3, f"Expected 3 remaining sessions, got {len(remaining_sessions)}" - # PHASE 3: Trigger LRU cleanup - print("\n๐Ÿงน PHASE 3: Triggering LRU session cleanup...") + # Session A should remain initially (preserved due to recent activity) + assert "session_A" in remaining_sessions, "Session A should remain initially (most recently used)" + print("โœ… Phase 2: Session A initially preserved due to recent activity") - # Force cleanup by mocking old cleanup time - old_time = datetime.now(UTC) - timedelta(days=2) - db._cleanup_service.last_session_cleanup_time = old_time + # PHASE 3: Test that cleanup maintains the limit + print("\n๐Ÿงน PHASE 3: Testing that limit is maintained...") - # Trigger cleanup - deleted_count = db._cleanup_service.cleanup_excess_sessions() - print(f" Deleted {deleted_count} records") + # Try to create another session - should trigger cleanup again + await db.save_conversation("session_F", "tool1", "input1", "output1") - # Verify session count is now at limit + # Should still be at limit final_count = db._cleanup_service.get_session_count() - assert final_count == 3, f"Expected 3 sessions after cleanup, got {final_count}" - print(f"โœ… Phase 3: Session count reduced to {final_count}") + assert final_count == 3, f"Expected 3 sessions after adding new session, got {final_count}" + print(f"โœ… Phase 3: Session limit maintained at {final_count} after adding new session") - # PHASE 4: Verify which sessions remain - print("\n๐Ÿ” PHASE 4: Verifying remaining sessions...") + # PHASE 4: Verify which sessions remain after adding session_F + print("\n๐Ÿ” PHASE 4: Verifying final remaining sessions...") sessions_to_check = [ "session_A", @@ -287,59 +293,53 @@ async def test_lru_session_cleanup_lifecycle(self): "session_C", "session_D", "session_E", + "session_F", ] - remaining_sessions = [] - deleted_sessions = [] + final_remaining_sessions = [] + final_deleted_sessions = [] for session_id in sessions_to_check: records = await db.get_session_conversations(session_id) if records: - remaining_sessions.append(session_id) + final_remaining_sessions.append(session_id) last_activity = max(r.timestamp for r in records) print( f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}" ) else: - deleted_sessions.append(session_id) + final_deleted_sessions.append(session_id) print(f" โŒ {session_id}: DELETED (was least recently used)") - # Verify correct sessions were kept/deleted - assert "session_A" in remaining_sessions, ( - "Session A should remain (most recent activity)" - ) - assert "session_D" in remaining_sessions, ( - "Session D should remain (recent creation)" - ) - assert "session_E" in remaining_sessions, ( - "Session E should remain (most recent creation)" - ) - assert "session_B" in deleted_sessions, "Session B should be deleted (LRU)" - assert "session_C" in deleted_sessions, "Session C should be deleted (LRU)" + # Verify we have exactly 3 sessions + assert len(final_remaining_sessions) == 3, f"Expected 3 remaining sessions, got {len(final_remaining_sessions)}" - print("โœ… Phase 4: LRU cleanup preserved active sessions correctly") - - # PHASE 5: Verify Session A has both records (original + recent activity) - print("\n๐Ÿ“Š PHASE 5: Verifying session record preservation...") - - session_a_records = await db.get_session_conversations("session_A") - assert len(session_a_records) == 2, ( - f"Session A should have 2 records, got {len(session_a_records)}" + # The 3 most recently used sessions should remain (D, E, F) + # Session A gets removed because even though it was updated, + # sessions D and E were created after A's update, making them more recent + expected_remaining = {"session_D", "session_E", "session_F"} + actual_remaining = set(final_remaining_sessions) + assert actual_remaining == expected_remaining, ( + f"Expected {expected_remaining}, got {actual_remaining}" ) - # Check that both records exist - sources = [r.source for r in session_a_records] - assert "tool1" in sources, "Session A should have original tool1 record" - assert "tool2" in sources, "Session A should have recent tool2 record" + print("โœ… Phase 4: LRU cleanup working correctly - most recent sessions preserved") + + # PHASE 5: Verify that LRU cleanup works correctly over time + print("\n๐Ÿ“Š PHASE 5: Verifying LRU behavior over time...") - print(f" Session A has {len(session_a_records)} records: {sources}") - print("โœ… Phase 5: Session record preservation working correctly") + # Session A was initially preserved but then removed when Session F was created + # This demonstrates that LRU is based on actual timestamps, not update sequence + print(" - Session A was initially preserved due to recent activity") + print(" - Session A was later removed when newer sessions (D, E, F) became more recent") + print(" - This shows LRU is working correctly based on actual timestamps") - print("\n๐ŸŽฏ LRU CLEANUP SUMMARY:") - print(" - Sessions B, C deleted (least recently used)") - print(" - Sessions A, D, E preserved (most recently used)") - print(" - Session A preserved despite being oldest (recent activity)") - print(" - LRU provides better UX than FIFO!") - print("โœ… LRU session cleanup lifecycle test PASSED!") + print("\n๐ŸŽฏ IMMEDIATE LRU CLEANUP SUMMARY:") + print(" - Cleanup happens immediately when new sessions are created") + print(" - Session limit is maintained at all times (no daily delay)") + print(" - Most recently used sessions are preserved based on actual timestamps") + print(" - LRU correctly removes sessions with older last activity times") + print(" - LRU provides better UX than FIFO by preserving active sessions!") + print("โœ… Immediate LRU session cleanup lifecycle test PASSED!") print("โœ… Time-based cleanup integration working correctly") From b33cad6164a567ef15d6077d7716db11a53b22c8 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:13:31 +0300 Subject: [PATCH 19/28] fix: remove unused imports --- src/mcp_as_a_judge/db/cleanup_service.py | 2 +- tests/test_conversation_history_lifecycle.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 0da3c75..2ae536b 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,7 +5,7 @@ removing least recently used sessions when session limits are exceeded. """ -from datetime import UTC, datetime + from sqlalchemy import Engine, func from sqlmodel import Session, select diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 007cdb9..c2cf4b0 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,7 +5,7 @@ """ import asyncio -from datetime import UTC, datetime, timedelta + import pytest From af6a4c620843bd236eef2e6080693f7676ab48f9 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:21:25 +0300 Subject: [PATCH 20/28] fix: remove unused imports --- tests/test_conversation_history_lifecycle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index c2cf4b0..e05c668 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -6,7 +6,6 @@ import asyncio - import pytest from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider From 282e49873a97338fde8eb3a956b7a2ccb7863904 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:31:58 +0300 Subject: [PATCH 21/28] fix: remove reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 4 -- .../db/providers/sqlite_provider.py | 18 +++--- tests/test_conversation_history_lifecycle.py | 60 ++++++++++++++----- 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 2ae536b..59bbabd 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -5,8 +5,6 @@ removing least recently used sessions when session limits are exceeded. """ - - from sqlalchemy import Engine, func from sqlmodel import Session, select @@ -46,8 +44,6 @@ def __init__(self, engine: Engine) -> None: self.engine = engine self.max_total_sessions = MAX_TOTAL_SESSIONS - - def get_session_count(self) -> int: """ Get the total number of unique sessions in the database. diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index fc39b8d..ad591df 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -30,7 +30,8 @@ class SQLiteProvider(ConversationHistoryDB): - SQLModel with SQLAlchemy for type safety - In-memory or file-based SQLite storage - Two-level cleanup strategy: - 1. Session-based LRU cleanup (runs when new sessions are created, removes least recently used) + 1. Session-based LRU cleanup (runs when new sessions are created, + removes least recently used) 2. Per-session FIFO cleanup (max 20 records per session, runs on every save) - Session-based conversation retrieval """ @@ -81,12 +82,11 @@ def _create_tables(self) -> None: SQLModel.metadata.create_all(self.engine) logger.info("๐Ÿ“‹ Created conversation_history table with SQLModel") - - def _cleanup_excess_sessions(self) -> int: """ Remove least recently used sessions when total sessions exceed limit. - This implements LRU cleanup to maintain session limit for better memory management. + This implements LRU cleanup to maintain session limit for better memory + management. Runs immediately when new sessions are created and limit is exceeded. """ return self._cleanup_service.cleanup_excess_sessions() @@ -105,8 +105,8 @@ def _cleanup_old_messages(self, session_id: str) -> int: current_count = len(current_records) logger.info( - f"๐Ÿงน FIFO cleanup check for session {session_id}: {current_count} records " - f"(max: {self._max_session_records})" + f"๐Ÿงน FIFO cleanup check for session {session_id}: " + f"{current_count} records (max: {self._max_session_records})" ) if current_count <= self._max_session_records: @@ -137,7 +137,8 @@ def _cleanup_old_messages(self, session_id: str) -> int: session.commit() logger.info( - f"โœ… LRU cleanup completed: removed {len(old_records)} records from session {session_id}" + f"โœ… LRU cleanup completed: removed {len(old_records)} records " + f"from session {session_id}" ) return len(old_records) @@ -187,7 +188,8 @@ async def save_conversation( logger.info(f"๐Ÿ†• New session detected: {session_id}, running LRU cleanup") self._cleanup_excess_sessions() - # Per-session FIFO cleanup: maintain max 20 records per session (runs on every save) + # Per-session FIFO cleanup: maintain max 20 records per session + # (runs on every save) self._cleanup_old_messages(session_id) return record_id diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index e05c668..75a95d9 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -203,7 +203,9 @@ async def test_immediate_cleanup_integration(self): # Should still have only 2 sessions (LRU cleanup triggered) final_count = db._cleanup_service.get_session_count() assert final_count == 2 - print(f"โœ… After immediate LRU cleanup: {final_count} sessions (limit maintained)") + print( + f"โœ… After immediate LRU cleanup: {final_count} sessions (limit maintained)" + ) @pytest.mark.asyncio async def test_lru_session_cleanup_lifecycle(self): @@ -242,7 +244,8 @@ async def test_lru_session_cleanup_lifecycle(self): ) print(" Session A: Updated with recent activity (now most recently used)") - # Session D: Created fourth (should trigger cleanup, but Session A should be preserved) + # Session D: Created fourth (should trigger cleanup, but Session A + # should be preserved) await db.save_conversation("session_D", "tool1", "input1", "output1") print(" Session D: Created (cleanup triggered)") @@ -252,24 +255,39 @@ async def test_lru_session_cleanup_lifecycle(self): # Verify automatic LRU cleanup happened (should be at limit of 3) current_count = db._cleanup_service.get_session_count() - assert current_count == 3, f"Expected 3 sessions after automatic cleanup, got {current_count}" - print(f"โœ… Phase 1: Automatic LRU cleanup maintained limit - {current_count} sessions remain") + assert current_count == 3, ( + f"Expected 3 sessions after automatic cleanup, got {current_count}" + ) + print( + f"โœ… Phase 1: Automatic LRU cleanup maintained limit - " + f"{current_count} sessions remain" + ) # PHASE 2: Verify which sessions remain after automatic cleanup print("\n๐Ÿ” PHASE 2: Verifying remaining sessions after automatic cleanup...") # Check which sessions still exist remaining_sessions = [] - for session_id in ["session_A", "session_B", "session_C", "session_D", "session_E"]: + for session_id in [ + "session_A", + "session_B", + "session_C", + "session_D", + "session_E" + ]: records = await db.get_session_conversations(session_id) if records: remaining_sessions.append(session_id) print(f" Remaining sessions: {remaining_sessions}") - assert len(remaining_sessions) == 3, f"Expected 3 remaining sessions, got {len(remaining_sessions)}" + assert len(remaining_sessions) == 3, ( + f"Expected 3 remaining sessions, got {len(remaining_sessions)}" + ) # Session A should remain initially (preserved due to recent activity) - assert "session_A" in remaining_sessions, "Session A should remain initially (most recently used)" + assert "session_A" in remaining_sessions, ( + "Session A should remain initially (most recently used)" + ) print("โœ… Phase 2: Session A initially preserved due to recent activity") # PHASE 3: Test that cleanup maintains the limit @@ -280,8 +298,12 @@ async def test_lru_session_cleanup_lifecycle(self): # Should still be at limit final_count = db._cleanup_service.get_session_count() - assert final_count == 3, f"Expected 3 sessions after adding new session, got {final_count}" - print(f"โœ… Phase 3: Session limit maintained at {final_count} after adding new session") + assert final_count == 3, ( + f"Expected 3 sessions after adding new session, got {final_count}" + ) + print( + f"โœ… Phase 3: Session limit maintained at {final_count} after adding new session" + ) # PHASE 4: Verify which sessions remain after adding session_F print("\n๐Ÿ” PHASE 4: Verifying final remaining sessions...") @@ -303,14 +325,17 @@ async def test_lru_session_cleanup_lifecycle(self): final_remaining_sessions.append(session_id) last_activity = max(r.timestamp for r in records) print( - f" โœ… {session_id}: {len(records)} records, last activity: {last_activity}" + f" โœ… {session_id}: {len(records)} records, " + f"last activity: {last_activity}" ) else: final_deleted_sessions.append(session_id) print(f" โŒ {session_id}: DELETED (was least recently used)") # Verify we have exactly 3 sessions - assert len(final_remaining_sessions) == 3, f"Expected 3 remaining sessions, got {len(final_remaining_sessions)}" + assert len(final_remaining_sessions) == 3, ( + f"Expected 3 remaining sessions, got {len(final_remaining_sessions)}" + ) # The 3 most recently used sessions should remain (D, E, F) # Session A gets removed because even though it was updated, @@ -321,7 +346,9 @@ async def test_lru_session_cleanup_lifecycle(self): f"Expected {expected_remaining}, got {actual_remaining}" ) - print("โœ… Phase 4: LRU cleanup working correctly - most recent sessions preserved") + print( + "โœ… Phase 4: LRU cleanup working correctly - most recent sessions preserved" + ) # PHASE 5: Verify that LRU cleanup works correctly over time print("\n๐Ÿ“Š PHASE 5: Verifying LRU behavior over time...") @@ -329,13 +356,18 @@ async def test_lru_session_cleanup_lifecycle(self): # Session A was initially preserved but then removed when Session F was created # This demonstrates that LRU is based on actual timestamps, not update sequence print(" - Session A was initially preserved due to recent activity") - print(" - Session A was later removed when newer sessions (D, E, F) became more recent") + print( + " - Session A was later removed when newer sessions (D, E, F) " + "became more recent" + ) print(" - This shows LRU is working correctly based on actual timestamps") print("\n๐ŸŽฏ IMMEDIATE LRU CLEANUP SUMMARY:") print(" - Cleanup happens immediately when new sessions are created") print(" - Session limit is maintained at all times (no daily delay)") - print(" - Most recently used sessions are preserved based on actual timestamps") + print( + " - Most recently used sessions are preserved based on actual timestamps" + ) print(" - LRU correctly removes sessions with older last activity times") print(" - LRU provides better UX than FIFO by preserving active sessions!") print("โœ… Immediate LRU session cleanup lifecycle test PASSED!") From b13337b29d066ccb352bd1e67bfda3cbc5e53027 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:39:33 +0300 Subject: [PATCH 22/28] fix: remove reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 8 +++++--- src/mcp_as_a_judge/db/providers/sqlite_provider.py | 1 - tests/test_conversation_history_lifecycle.py | 5 ++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 59bbabd..5fccdee 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -15,7 +15,6 @@ # Set up logger logger = get_logger(__name__) - class ConversationCleanupService: """ Service for cleaning up conversation history records. @@ -74,7 +73,8 @@ def get_least_recently_used_sessions(self, limit: int) -> list[str]: """ with Session(self.engine) as session: # Find sessions with oldest last activity (LRU) - # GROUP BY session_id, ORDER BY MAX(timestamp) ASC to get least recently used + # GROUP BY session_id, ORDER BY MAX(timestamp) ASC to get least + # recently used lru_stmt = ( select( ConversationRecord.session_id, @@ -104,7 +104,9 @@ def delete_sessions(self, session_ids: list[str]) -> int: with Session(self.engine) as session: # Count records before deletion for logging count_stmt = select(ConversationRecord).where( - ConversationRecord.session_id.in_(session_ids) # type: ignore[attr-defined] + ConversationRecord.session_id.in_( # type: ignore[attr-defined] + session_ids + ) ) records_to_delete = session.exec(count_stmt).all() delete_count = len(records_to_delete) diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index ad591df..bdf8749 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -18,7 +18,6 @@ # Set up logger logger = get_logger(__name__) - class SQLiteProvider(ConversationHistoryDB): """ SQLModel-based SQLite database provider for conversation history. diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 75a95d9..0777525 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -10,7 +10,6 @@ from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider - class TestConversationHistoryLifecycle: """Test the complete lifecycle of conversation history records.""" @@ -302,7 +301,8 @@ async def test_lru_session_cleanup_lifecycle(self): f"Expected 3 sessions after adding new session, got {final_count}" ) print( - f"โœ… Phase 3: Session limit maintained at {final_count} after adding new session" + f"โœ… Phase 3: Session limit maintained at {final_count} " + f"after adding new session" ) # PHASE 4: Verify which sessions remain after adding session_F @@ -432,7 +432,6 @@ async def test_edge_cases_and_error_handling(self): print("โœ… All edge cases handled correctly") - if __name__ == "__main__": # Run tests directly for development async def run_tests(): From 4f8a550c5e442eaf02bbe6c46b6f9881602b3b84 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:41:20 +0300 Subject: [PATCH 23/28] fix: remove reformat --- tests/test_conversation_history_lifecycle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 0777525..7af5709 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,7 +5,6 @@ """ import asyncio - import pytest from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider From cfbd094d3c607d73a2d7e99c0ca62439429a866c Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:43:16 +0300 Subject: [PATCH 24/28] fix: remove reformat --- tests/test_conversation_history_lifecycle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 7af5709..0777525 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,6 +5,7 @@ """ import asyncio + import pytest from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider From 7add7f626842107733e3f3596fc17b8bacffd62b Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:45:05 +0300 Subject: [PATCH 25/28] fix: remove reformat --- tests/test_conversation_history_lifecycle.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 0777525..78de3f4 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,9 +5,7 @@ """ import asyncio - import pytest - from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider class TestConversationHistoryLifecycle: From 8f4bf2fe53bfcd387beec0898340eeef6c8169dc Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:49:08 +0300 Subject: [PATCH 26/28] fix: maybe fix reformat --- tests/test_conversation_history_lifecycle.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 78de3f4..0777525 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -5,7 +5,9 @@ """ import asyncio + import pytest + from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider class TestConversationHistoryLifecycle: From ca895ec0ba3f76a3d15893421dc2276f5eae2454 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:55:05 +0300 Subject: [PATCH 27/28] fix: maybe fix reformat --- tests/test_conversation_history_lifecycle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 0777525..7b80370 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -10,6 +10,7 @@ from mcp_as_a_judge.db.providers.sqlite_provider import SQLiteProvider + class TestConversationHistoryLifecycle: """Test the complete lifecycle of conversation history records.""" From 4cd0ca4186bdba5e1a73cb9609a0f89f84fba600 Mon Sep 17 00:00:00 2001 From: dori Date: Thu, 11 Sep 2025 10:59:26 +0300 Subject: [PATCH 28/28] fix: should fix reformat --- src/mcp_as_a_judge/db/cleanup_service.py | 1 + src/mcp_as_a_judge/db/providers/sqlite_provider.py | 7 ++++--- tests/test_conversation_history_lifecycle.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mcp_as_a_judge/db/cleanup_service.py b/src/mcp_as_a_judge/db/cleanup_service.py index 5fccdee..a06fcaf 100644 --- a/src/mcp_as_a_judge/db/cleanup_service.py +++ b/src/mcp_as_a_judge/db/cleanup_service.py @@ -15,6 +15,7 @@ # Set up logger logger = get_logger(__name__) + class ConversationCleanupService: """ Service for cleaning up conversation history records. diff --git a/src/mcp_as_a_judge/db/providers/sqlite_provider.py b/src/mcp_as_a_judge/db/providers/sqlite_provider.py index bdf8749..bf168a7 100644 --- a/src/mcp_as_a_judge/db/providers/sqlite_provider.py +++ b/src/mcp_as_a_judge/db/providers/sqlite_provider.py @@ -18,6 +18,7 @@ # Set up logger logger = get_logger(__name__) + class SQLiteProvider(ConversationHistoryDB): """ SQLModel-based SQLite database provider for conversation history. @@ -145,9 +146,9 @@ def _is_new_session(self, session_id: str) -> bool: """Check if this is a new session (no existing records).""" with Session(self.engine) as session: existing_record = session.exec( - select(ConversationRecord).where( - ConversationRecord.session_id == session_id - ).limit(1) + select(ConversationRecord) + .where(ConversationRecord.session_id == session_id) + .limit(1) ).first() return existing_record is None diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py index 7b80370..66cd88e 100644 --- a/tests/test_conversation_history_lifecycle.py +++ b/tests/test_conversation_history_lifecycle.py @@ -273,7 +273,7 @@ async def test_lru_session_cleanup_lifecycle(self): "session_B", "session_C", "session_D", - "session_E" + "session_E", ]: records = await db.get_session_conversations(session_id) if records: @@ -433,6 +433,7 @@ async def test_edge_cases_and_error_handling(self): print("โœ… All edge cases handled correctly") + if __name__ == "__main__": # Run tests directly for development async def run_tests():