diff --git a/src/basic_memory/cli/app.py b/src/basic_memory/cli/app.py index ea749735..d6e74f9a 100644 --- a/src/basic_memory/cli/app.py +++ b/src/basic_memory/cli/app.py @@ -63,11 +63,12 @@ def app_callback( # Run initialization for commands that don't use the API # Skip for 'mcp' command - it has its own lifespan that handles initialization # Skip for API-using commands (status, sync, etc.) - they handle initialization via deps.py - api_commands = {"mcp", "status", "sync", "project", "tool"} + # Skip for 'reset' command - it manages its own database lifecycle + skip_init_commands = {"mcp", "status", "sync", "project", "tool", "reset"} if ( not version and ctx.invoked_subcommand is not None - and ctx.invoked_subcommand not in api_commands + and ctx.invoked_subcommand not in skip_init_commands ): from basic_memory.services.initialization import ensure_initialization diff --git a/src/basic_memory/cli/commands/command_utils.py b/src/basic_memory/cli/commands/command_utils.py index 5f34cf8f..286c0430 100644 --- a/src/basic_memory/cli/commands/command_utils.py +++ b/src/basic_memory/cli/commands/command_utils.py @@ -42,23 +42,45 @@ async def _with_cleanup() -> T: return asyncio.run(_with_cleanup()) -async def run_sync(project: Optional[str] = None, force_full: bool = False): +async def run_sync( + project: Optional[str] = None, + force_full: bool = False, + run_in_background: bool = True, +): """Run sync operation via API endpoint. Args: project: Optional project name force_full: If True, force a full scan bypassing watermark optimization + run_in_background: If True, return immediately; if False, wait for completion """ try: async with get_client() as client: project_item = await get_active_project(client, project, None) url = f"{project_item.project_url}/project/sync" + params = [] if force_full: - url += "?force_full=true" + params.append("force_full=true") + if not run_in_background: + params.append("run_in_background=false") + if params: + url += "?" + "&".join(params) response = await call_post(client, url) data = response.json() - console.print(f"[green]{data['message']}[/green]") + # Background mode returns {"message": "..."}, foreground returns SyncReportResponse + if "message" in data: + console.print(f"[green]{data['message']}[/green]") + else: + # Foreground mode - show summary of sync results + total = data.get("total", 0) + new_count = len(data.get("new", [])) + modified_count = len(data.get("modified", [])) + deleted_count = len(data.get("deleted", [])) + console.print( + f"[green]Synced {total} files[/green] " + f"(new: {new_count}, modified: {modified_count}, deleted: {deleted_count})" + ) except (ToolError, ValueError) as e: console.print(f"[red]Sync failed: {e}[/red]") raise typer.Exit(1) diff --git a/src/basic_memory/cli/commands/db.py b/src/basic_memory/cli/commands/db.py index 93a421cd..a878252c 100644 --- a/src/basic_memory/cli/commands/db.py +++ b/src/basic_memory/cli/commands/db.py @@ -1,13 +1,50 @@ """Database management commands.""" import asyncio +from pathlib import Path import typer from loguru import logger +from rich.console import Console +from sqlalchemy.exc import OperationalError from basic_memory import db from basic_memory.cli.app import app -from basic_memory.config import ConfigManager, BasicMemoryConfig, save_basic_memory_config +from basic_memory.config import ConfigManager +from basic_memory.repository import ProjectRepository +from basic_memory.services.initialization import reconcile_projects_with_config +from basic_memory.sync.sync_service import get_sync_service + +console = Console() + + +async def _reindex_projects(app_config): + """Reindex all projects in a single async context. + + This ensures all database operations use the same event loop, + and proper cleanup happens when the function completes. + """ + try: + await reconcile_projects_with_config(app_config) + + # Get database session (migrations already run if needed) + _, session_maker = await db.get_or_create_db( + db_path=app_config.database_path, + db_type=db.DatabaseType.FILESYSTEM, + ) + project_repository = ProjectRepository(session_maker) + projects = await project_repository.get_active_projects() + + for project in projects: + console.print(f" Indexing [cyan]{project.name}[/cyan]...") + logger.info(f"Starting sync for project: {project.name}") + sync_service = await get_sync_service(project) + sync_dir = Path(project.path) + await sync_service.sync(sync_dir, project_name=project.name) + logger.info(f"Sync completed for project: {project.name}") + finally: + # Clean up database connections before event loop closes + await db.shutdown_db() @app.command() @@ -15,30 +52,52 @@ def reset( reindex: bool = typer.Option(False, "--reindex", help="Rebuild db index from filesystem"), ): # pragma: no cover """Reset database (drop all tables and recreate).""" - if typer.confirm("This will delete all data in your db. Are you sure?"): + console.print( + "[yellow]Note:[/yellow] This only deletes the index database. " + "Your markdown note files will not be affected.\n" + "Use [green]bm reset --reindex[/green] to automatically rebuild the index afterward." + ) + if typer.confirm("Reset the database index?"): logger.info("Resetting database...") config_manager = ConfigManager() app_config = config_manager.config # Get database path db_path = app_config.app_database_path - # Delete the database file if it exists - if db_path.exists(): - db_path.unlink() - logger.info(f"Database file deleted: {db_path}") + # Delete the database file and WAL files if they exist + for suffix in ["", "-shm", "-wal"]: + path = db_path.parent / f"{db_path.name}{suffix}" + if path.exists(): + try: + path.unlink() + logger.info(f"Deleted: {path}") + except OSError as e: + console.print( + f"[red]Error:[/red] Cannot delete {path.name}: {e}\n" + "The database may be in use by another process (e.g., MCP server).\n" + "Please close Claude Desktop or any other Basic Memory clients and try again." + ) + raise typer.Exit(1) - # Reset project configuration - config = BasicMemoryConfig() - save_basic_memory_config(config_manager.config_file, config) - logger.info("Project configuration reset to default") - - # Create a new empty database - asyncio.run(db.run_migrations(app_config)) - logger.info("Database reset complete") + # Create a new empty database (preserves project configuration) + try: + asyncio.run(db.run_migrations(app_config)) + except OperationalError as e: + if "disk I/O error" in str(e) or "database is locked" in str(e): + console.print( + "[red]Error:[/red] Cannot access database. " + "It may be in use by another process (e.g., MCP server).\n" + "Please close Claude Desktop or any other Basic Memory clients and try again." + ) + raise typer.Exit(1) + raise + console.print("[green]Database reset complete[/green]") if reindex: - # Run database sync directly - from basic_memory.cli.commands.command_utils import run_sync - - logger.info("Rebuilding search index from filesystem...") - asyncio.run(run_sync(project=None)) + projects = list(app_config.projects) + if not projects: + console.print("[yellow]No projects configured. Skipping reindex.[/yellow]") + else: + console.print(f"Rebuilding search index for {len(projects)} project(s)...") + asyncio.run(_reindex_projects(app_config)) + console.print("[green]Reindex complete[/green]") diff --git a/src/basic_memory/repository/sqlite_search_repository.py b/src/basic_memory/repository/sqlite_search_repository.py index 3fd12235..66a39ccf 100644 --- a/src/basic_memory/repository/sqlite_search_repository.py +++ b/src/basic_memory/repository/sqlite_search_repository.py @@ -27,17 +27,15 @@ class SQLiteSearchRepository(SearchRepositoryBase): """ async def init_search_index(self): - """Create FTS5 virtual table for search. + """Create FTS5 virtual table for search if it doesn't exist. - Note: Drops any existing search_index table first to ensure FTS5 virtual table creation. - This is necessary because Base.metadata.create_all() might create a regular table. + Uses CREATE VIRTUAL TABLE IF NOT EXISTS to preserve existing indexed data + across server restarts. """ logger.info("Initializing SQLite FTS5 search index") try: async with db.scoped_session(self.session_maker) as session: - # Drop any existing regular or virtual table first - await session.execute(text("DROP TABLE IF EXISTS search_index")) - # Create FTS5 virtual table + # Create FTS5 virtual table if it doesn't exist await session.execute(CREATE_SEARCH_INDEX) await session.commit() except Exception as e: # pragma: no cover diff --git a/tests/mcp/clients/test_clients.py b/tests/mcp/clients/test_clients.py index 333f9fa6..35300ba9 100644 --- a/tests/mcp/clients/test_clients.py +++ b/tests/mcp/clients/test_clients.py @@ -1,7 +1,7 @@ """Tests for typed API clients.""" import pytest -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import MagicMock from basic_memory.mcp.clients import ( KnowledgeClient, diff --git a/tests/repository/test_search_repository.py b/tests/repository/test_search_repository.py index 9b167a77..d19c994f 100644 --- a/tests/repository/test_search_repository.py +++ b/tests/repository/test_search_repository.py @@ -107,6 +107,48 @@ async def test_init_search_index(search_repository, app_config): assert table_name == "search_index" +@pytest.mark.asyncio +async def test_init_search_index_preserves_data(search_repository, search_entity): + """Regression test: calling init_search_index() twice should preserve indexed data. + + This test prevents regression of the bug fixed in PR #503 where + init_search_index() was dropping existing data on every call due to + an unconditional DROP TABLE statement. + + The bug caused search to work immediately after creating notes, but + return empty results after MCP server restarts (~30 minutes in Claude Desktop). + """ + # Create and index a search item + search_row = SearchIndexRow( + id=search_entity.id, + type=SearchItemType.ENTITY.value, + title=search_entity.title, + content_stems="regression test content for server restart", + content_snippet="This content should persist across init_search_index calls", + permalink=search_entity.permalink, + file_path=search_entity.file_path, + entity_id=search_entity.id, + metadata={"entity_type": search_entity.entity_type}, + created_at=search_entity.created_at, + updated_at=search_entity.updated_at, + project_id=search_repository.project_id, + ) + await search_repository.index_item(search_row) + + # Verify it's searchable + results = await search_repository.search(search_text="regression test") + assert len(results) == 1 + assert results[0].title == search_entity.title + + # Re-initialize the search index (simulates MCP server restart) + await search_repository.init_search_index() + + # Verify data is still there after re-initialization + results_after = await search_repository.search(search_text="regression test") + assert len(results_after) == 1, "Search index data was lost after init_search_index()" + assert results_after[0].id == search_entity.id + + @pytest.mark.asyncio async def test_index_item(search_repository, search_entity): """Test indexing an item with project_id.""" diff --git a/tests/test_project_resolver.py b/tests/test_project_resolver.py index 1d4b4e66..2383a213 100644 --- a/tests/test_project_resolver.py +++ b/tests/test_project_resolver.py @@ -1,6 +1,5 @@ """Tests for ProjectResolver - unified project resolution logic.""" -import os import pytest from basic_memory.project_resolver import ( ProjectResolver,