11"""
22Database cleanup service for conversation history records.
33
4- This service handles time -based cleanup operations for conversation history records,
5- removing records older than the retention period (default: 1 day) .
4+ This service handles LRU -based cleanup operations for conversation history records,
5+ removing least recently used sessions when session limits are exceeded .
66"""
77
8- from datetime import datetime , timedelta
9-
10- from sqlalchemy import Engine
8+ from sqlalchemy import Engine , func
119from sqlmodel import Session , select
1210
13- from mcp_as_a_judge .constants import RECORD_RETENTION_DAYS
11+ from mcp_as_a_judge .constants import MAX_TOTAL_SESSIONS
1412from mcp_as_a_judge .db .interface import ConversationRecord
1513from mcp_as_a_judge .logging_config import get_logger
1614
2018
2119class ConversationCleanupService :
2220 """
23- Service for cleaning up old conversation history records.
21+ Service for cleaning up conversation history records.
22+
23+ Implements session-based LRU cleanup strategy:
24+ - Maintains session limit by removing least recently used sessions
25+ - Runs immediately when new sessions are created and limit is exceeded
26+
27+ LRU vs FIFO for Better User Experience:
28+ - LRU (Least Recently Used): Keeps sessions that users are actively using,
29+ even if they're old
30+ - FIFO (First In, First Out): Would remove oldest sessions regardless of
31+ recent activity
32+ - LRU provides better UX because active conversations are preserved longer
2433
25- Handles time-based cleanup: Removes records older than retention period.
26- Note: LRU cleanup is handled by the SQLite provider during save operations.
34+ Note: Per-session FIFO cleanup (max 20 records) is handled by the SQLite provider.
2735 """
2836
2937 def __init__ (self , engine : Engine ) -> None :
@@ -34,48 +42,134 @@ def __init__(self, engine: Engine) -> None:
3442 engine: SQLAlchemy engine for database operations
3543 """
3644 self .engine = engine
37- self .retention_days = RECORD_RETENTION_DAYS
38- self .last_cleanup_time = datetime .utcnow ()
45+ self .max_total_sessions = MAX_TOTAL_SESSIONS
3946
40- def cleanup_old_records (self ) -> int :
47+ def get_session_count (self ) -> int :
4148 """
42- Remove records older than retention_days.
43- This runs once per day to avoid excessive cleanup operations.
49+ Get the total number of unique sessions in the database.
4450
4551 Returns:
46- Number of records deleted
52+ Number of unique sessions
4753 """
48- # Only run cleanup once per day
49- if (datetime .utcnow () - self .last_cleanup_time ).days < 1 :
50- return 0
54+ with Session (self .engine ) as session :
55+ # Count distinct session_ids
56+ count_stmt = select (
57+ func .count (func .distinct (ConversationRecord .session_id ))
58+ )
59+ result = session .exec (count_stmt ).first ()
60+ return result or 0
61+
62+ def get_least_recently_used_sessions (self , limit : int ) -> list [str ]:
63+ """
64+ Get session IDs of the least recently used sessions.
5165
52- cutoff_date = datetime .utcnow () - timedelta (days = self .retention_days )
66+ Uses LRU strategy: finds sessions with the oldest "last activity" timestamp.
67+ Last activity = MAX(timestamp) for each session (most recent record in session).
5368
69+ Args:
70+ limit: Number of session IDs to return
71+
72+ Returns:
73+ List of session IDs ordered by last activity (oldest first)
74+ """
5475 with Session (self .engine ) as session :
55- # Count old records
56- old_count_stmt = select (ConversationRecord ).where (
57- ConversationRecord .timestamp < cutoff_date
76+ # Find sessions with oldest last activity (LRU)
77+ # GROUP BY session_id, ORDER BY MAX(timestamp) ASC to get least
78+ # recently used
79+ lru_stmt = (
80+ select (
81+ ConversationRecord .session_id ,
82+ func .max (ConversationRecord .timestamp ).label ("last_activity" ),
83+ )
84+ .group_by (ConversationRecord .session_id )
85+ .order_by (func .max (ConversationRecord .timestamp ).asc ())
86+ .limit (limit )
5887 )
59- old_records = session .exec (old_count_stmt ).all ()
60- old_count = len (old_records )
6188
62- if old_count == 0 :
63- logger .info (
64- f"🧹 Daily cleanup: No records older than { self .retention_days } days"
89+ results = session .exec (lru_stmt ).all ()
90+ return [result [0 ] for result in results ]
91+
92+ def delete_sessions (self , session_ids : list [str ]) -> int :
93+ """
94+ Bulk delete all records for the given session IDs.
95+
96+ Args:
97+ session_ids: List of session IDs to delete
98+
99+ Returns:
100+ Number of records deleted
101+ """
102+ if not session_ids :
103+ return 0
104+
105+ with Session (self .engine ) as session :
106+ # Count records before deletion for logging
107+ count_stmt = select (ConversationRecord ).where (
108+ ConversationRecord .session_id .in_ ( # type: ignore[attr-defined]
109+ session_ids
65110 )
66- self .last_cleanup_time = datetime .utcnow ()
67- return 0
111+ )
112+ records_to_delete = session .exec (count_stmt ).all ()
113+ delete_count = len (records_to_delete )
68114
69- # Delete old records
70- for record in old_records :
115+ # Bulk delete all records for these sessions
116+ for record in records_to_delete :
71117 session .delete (record )
72118
73119 session .commit ()
74120
75- # Reset cleanup tracking
76- self .last_cleanup_time = datetime .utcnow ()
121+ logger .info (
122+ f"🗑️ Deleted { delete_count } records from { len (session_ids )} sessions: "
123+ f"{ ', ' .join (session_ids [:3 ])} { '...' if len (session_ids ) > 3 else '' } "
124+ )
125+
126+ return delete_count
127+
128+ def cleanup_excess_sessions (self ) -> int :
129+ """
130+ Remove least recently used sessions when total sessions exceed
131+ MAX_TOTAL_SESSIONS.
132+
133+ This implements LRU (Least Recently Used) cleanup strategy:
134+ - Keeps sessions that users are actively using (better UX than FIFO)
135+ - Runs immediately when session limit is exceeded (no daily restriction)
136+ - Removes entire sessions (all records for those session_ids)
137+ - Called every time a new session is created to maintain session limit
138+
139+ Returns:
140+ Number of records deleted
141+ """
142+ current_session_count = self .get_session_count ()
77143
144+ if current_session_count <= self .max_total_sessions :
78145 logger .info (
79- f"🧹 Daily cleanup: Deleted { old_count } records older than { self .retention_days } days"
146+ f"🧹 Session LRU cleanup: { current_session_count } sessions "
147+ f"(max: { self .max_total_sessions } ) - no cleanup needed"
80148 )
81- return old_count
149+ return 0
150+
151+ # Calculate how many sessions to remove
152+ sessions_to_remove = current_session_count - self .max_total_sessions
153+
154+ logger .info (
155+ f"🧹 Session LRU cleanup: { current_session_count } sessions exceeds limit "
156+ f"({ self .max_total_sessions } ), removing { sessions_to_remove } "
157+ f"least recently used sessions"
158+ )
159+
160+ # Get least recently used sessions
161+ lru_session_ids = self .get_least_recently_used_sessions (sessions_to_remove )
162+
163+ if not lru_session_ids :
164+ logger .warning ("🧹 No sessions found for LRU cleanup" )
165+ return 0
166+
167+ # Delete all records for these sessions
168+ deleted_count = self .delete_sessions (lru_session_ids )
169+
170+ logger .info (
171+ f"✅ Session LRU cleanup completed: removed { sessions_to_remove } sessions, "
172+ f"deleted { deleted_count } records"
173+ )
174+
175+ return deleted_count
0 commit comments