Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/mcp_as_a_judge/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
# Database Configuration
DATABASE_URL = "sqlite://:memory:"
MAX_SESSION_RECORDS = 20 # Maximum records to keep per session (FIFO)
RECORD_RETENTION_DAYS = 1
MAX_TOTAL_SESSIONS = 2000 # Maximum total sessions to keep (LRU cleanup)
RECORD_RETENTION_DAYS = 1 # Optional time-based cleanup (fallback)
173 changes: 162 additions & 11 deletions src/mcp_as_a_judge/db/cleanup_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
removing records older than the retention period (default: 1 day).
"""

from datetime import datetime, timedelta
from datetime import UTC, datetime, timedelta

from sqlalchemy import Engine
from sqlalchemy import Engine, func
from sqlmodel import Session, select

from mcp_as_a_judge.constants import RECORD_RETENTION_DAYS
from mcp_as_a_judge.constants import MAX_TOTAL_SESSIONS, RECORD_RETENTION_DAYS
from mcp_as_a_judge.db.interface import ConversationRecord
from mcp_as_a_judge.logging_config import get_logger

Expand All @@ -22,8 +22,18 @@ class ConversationCleanupService:
"""
Service for cleaning up old conversation history records.

Handles time-based cleanup: Removes records older than retention period.
Note: LRU cleanup is handled by the SQLite provider during save operations.
Implements session-based LRU cleanup strategy:
- Maintains max 2000 sessions by removing least recently used sessions
- Runs once per day to avoid performance overhead

LRU vs FIFO for Better User Experience:
- LRU (Least Recently Used): Keeps sessions that users are actively using,
even if they're old
- FIFO (First In, First Out): Would remove oldest sessions regardless of
recent activity
- LRU provides better UX because active conversations are preserved longer

Note: Per-session FIFO cleanup (max 20 records) is handled by the SQLite provider.
"""

def __init__(self, engine: Engine) -> None:
Expand All @@ -34,8 +44,10 @@ def __init__(self, engine: Engine) -> None:
engine: SQLAlchemy engine for database operations
"""
self.engine = engine
self.max_total_sessions = MAX_TOTAL_SESSIONS
self.retention_days = RECORD_RETENTION_DAYS
self.last_cleanup_time = datetime.utcnow()
self.last_cleanup_time = datetime.now(UTC)
self.last_session_cleanup_time = datetime.now(UTC)

def cleanup_old_records(self) -> int:
"""
Expand All @@ -46,10 +58,10 @@ def cleanup_old_records(self) -> int:
Number of records deleted
"""
# Only run cleanup once per day
if (datetime.utcnow() - self.last_cleanup_time).days < 1:
if (datetime.now(UTC) - self.last_cleanup_time).days < 1:
return 0

cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days)
cutoff_date = datetime.now(UTC) - timedelta(days=self.retention_days)

with Session(self.engine) as session:
# Count old records
Expand All @@ -63,7 +75,7 @@ def cleanup_old_records(self) -> int:
logger.info(
f"🧹 Daily cleanup: No records older than {self.retention_days} days"
)
self.last_cleanup_time = datetime.utcnow()
self.last_cleanup_time = datetime.now(UTC)
return 0

# Delete old records
Expand All @@ -73,9 +85,148 @@ def cleanup_old_records(self) -> int:
session.commit()

# Reset cleanup tracking
self.last_cleanup_time = datetime.utcnow()
self.last_cleanup_time = datetime.now(UTC)

logger.info(
f"🧹 Daily cleanup: Deleted {old_count} records older than {self.retention_days} days"
f"🧹 Daily cleanup: Deleted {old_count} records older than "
f"{self.retention_days} days"
)
return old_count

def get_session_count(self) -> int:
"""
Get the total number of unique sessions in the database.

Returns:
Number of unique sessions
"""
with Session(self.engine) as session:
# Count distinct session_ids
count_stmt = select(
func.count(func.distinct(ConversationRecord.session_id))
)
result = session.exec(count_stmt).first()
return result or 0

def get_least_recently_used_sessions(self, limit: int) -> list[str]:
"""
Get session IDs of the least recently used sessions.

Uses LRU strategy: finds sessions with the oldest "last activity" timestamp.
Last activity = MAX(timestamp) for each session (most recent record in session).

Args:
limit: Number of session IDs to return

Returns:
List of session IDs ordered by last activity (oldest first)
"""
with Session(self.engine) as session:
# Find sessions with oldest last activity (LRU)
# GROUP BY session_id, ORDER BY MAX(timestamp) ASC to get least recently used
lru_stmt = (
select(
ConversationRecord.session_id,
func.max(ConversationRecord.timestamp).label("last_activity"),
)
.group_by(ConversationRecord.session_id)
.order_by(func.max(ConversationRecord.timestamp).asc())
.limit(limit)
)

results = session.exec(lru_stmt).all()
return [result[0] for result in results]

def delete_sessions(self, session_ids: list[str]) -> int:
"""
Bulk delete all records for the given session IDs.

Args:
session_ids: List of session IDs to delete

Returns:
Number of records deleted
"""
if not session_ids:
return 0

with Session(self.engine) as session:
# Count records before deletion for logging
count_stmt = select(ConversationRecord).where(
ConversationRecord.session_id.in_(session_ids) # type: ignore[attr-defined]
)
records_to_delete = session.exec(count_stmt).all()
delete_count = len(records_to_delete)

# Bulk delete all records for these sessions
for record in records_to_delete:
session.delete(record)

session.commit()

logger.info(
f"🗑️ Deleted {delete_count} records from {len(session_ids)} sessions: "
f"{', '.join(session_ids[:3])}{'...' if len(session_ids) > 3 else ''}"
)

return delete_count

def cleanup_excess_sessions(self) -> int:
"""
Remove least recently used sessions when total sessions exceed
MAX_TOTAL_SESSIONS.

This implements LRU (Least Recently Used) cleanup strategy:
- Keeps sessions that users are actively using (better UX than FIFO)
- Only runs once per day to avoid excessive cleanup operations
- During the day, session count can exceed limit
(e.g., 5000 sessions is not a memory issue)
- Daily cleanup brings it back to the target limit (2000 sessions)
- Removes entire sessions (all records for those session_ids)

Returns:
Number of records deleted
"""
# Only run session cleanup once per day
if (datetime.now(UTC) - self.last_session_cleanup_time).days < 1:
return 0

current_session_count = self.get_session_count()

if current_session_count <= self.max_total_sessions:
logger.info(
f"🧹 Daily session LRU cleanup: {current_session_count} sessions "
f"(max: {self.max_total_sessions}) - no cleanup needed"
)
self.last_session_cleanup_time = datetime.now(UTC)
return 0

# Calculate how many sessions to remove
sessions_to_remove = current_session_count - self.max_total_sessions

logger.info(
f"🧹 Daily session LRU cleanup: {current_session_count} sessions exceeds limit "
f"({self.max_total_sessions}), removing {sessions_to_remove} "
f"least recently used sessions"
)

# Get least recently used sessions
lru_session_ids = self.get_least_recently_used_sessions(sessions_to_remove)

if not lru_session_ids:
logger.warning("🧹 No sessions found for LRU cleanup")
self.last_session_cleanup_time = datetime.now(UTC)
return 0

# Delete all records for these sessions
deleted_count = self.delete_sessions(lru_session_ids)

# Reset cleanup tracking
self.last_session_cleanup_time = datetime.now(UTC)

logger.info(
f"✅ Daily session LRU cleanup completed: removed {sessions_to_remove} sessions, "
f"deleted {deleted_count} records"
)

return deleted_count
2 changes: 2 additions & 0 deletions src/mcp_as_a_judge/db/db_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mcp_as_a_judge.constants import (
DATABASE_URL,
MAX_SESSION_RECORDS,
MAX_TOTAL_SESSIONS,
RECORD_RETENTION_DAYS,
)

Expand Down Expand Up @@ -61,6 +62,7 @@ class DatabaseConfig:
def __init__(self) -> None:
self.url = DATABASE_URL
self.max_session_records = MAX_SESSION_RECORDS
self.max_total_sessions = MAX_TOTAL_SESSIONS
self.record_retention_days = RECORD_RETENTION_DAYS


Expand Down
27 changes: 19 additions & 8 deletions src/mcp_as_a_judge/db/providers/sqlite_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

import uuid
from datetime import datetime
from datetime import UTC, datetime

from sqlalchemy import create_engine
from sqlmodel import Session, SQLModel, asc, desc, select
Expand All @@ -29,8 +29,9 @@ class SQLiteProvider(ConversationHistoryDB):
Features:
- SQLModel with SQLAlchemy for type safety
- In-memory or file-based SQLite storage
- LRU cleanup per session
- Time-based cleanup (configurable retention)
- Two-level cleanup strategy:
1. Daily session-based LRU cleanup (max 2000 sessions, removes least recently used)
2. Per-session FIFO cleanup (max 20 records per session, runs on every save)
- Session-based conversation retrieval
"""

Expand Down Expand Up @@ -58,7 +59,9 @@ def __init__(self, max_session_records: int = 20, url: str = "") -> None:

logger.info(
f"🗄️ SQLModel SQLite provider initialized: {connection_string}, "
f"max_records={max_session_records}, retention_days={self._cleanup_service.retention_days}"
f"max_records_per_session={max_session_records}, "
f"max_total_sessions={self._cleanup_service.max_total_sessions}, "
f"retention_days={self._cleanup_service.retention_days}"
)

def _parse_sqlite_url(self, url: str) -> str:
Expand Down Expand Up @@ -86,6 +89,14 @@ def _cleanup_old_records(self) -> int:
"""
return self._cleanup_service.cleanup_old_records()

def _cleanup_excess_sessions(self) -> int:
"""
Remove least recently used sessions when total sessions exceed limit.
This implements daily LRU cleanup to maintain max 2000 sessions for better memory management.
Runs once per day - during the day session count can exceed limit without issues.
"""
return self._cleanup_service.cleanup_excess_sessions()

def _cleanup_old_messages(self, session_id: str) -> int:
"""
Remove old messages from a session using FIFO strategy.
Expand Down Expand Up @@ -141,7 +152,7 @@ async def save_conversation(
) -> str:
"""Save a conversation record to SQLite database with LRU cleanup."""
record_id = str(uuid.uuid4())
timestamp = datetime.utcnow()
timestamp = datetime.now(UTC)

logger.info(
f"💾 Saving conversation to SQLModel SQLite DB: record {record_id} "
Expand All @@ -164,10 +175,10 @@ async def save_conversation(

logger.info("✅ Successfully inserted record into conversation_history table")

# Daily cleanup: run once per day to remove old records
self._cleanup_old_records()
# Daily session LRU cleanup: maintain max 2000 sessions (runs once per day)
self._cleanup_excess_sessions()

# Always perform LRU cleanup for this session (lightweight)
# Per-session FIFO cleanup: maintain max 20 records per session (runs on every save)
self._cleanup_old_messages(session_id)

return record_id
Expand Down
Loading