Skip to content

Commit 26f7e98

Browse files
phernandezclaude
andauthored
fix: preserve search index across server restarts (#503)
Signed-off-by: phernandez <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent 5947f04 commit 26f7e98

File tree

7 files changed

+153
-32
lines changed

7 files changed

+153
-32
lines changed

src/basic_memory/cli/app.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,12 @@ def app_callback(
6363
# Run initialization for commands that don't use the API
6464
# Skip for 'mcp' command - it has its own lifespan that handles initialization
6565
# Skip for API-using commands (status, sync, etc.) - they handle initialization via deps.py
66-
api_commands = {"mcp", "status", "sync", "project", "tool"}
66+
# Skip for 'reset' command - it manages its own database lifecycle
67+
skip_init_commands = {"mcp", "status", "sync", "project", "tool", "reset"}
6768
if (
6869
not version
6970
and ctx.invoked_subcommand is not None
70-
and ctx.invoked_subcommand not in api_commands
71+
and ctx.invoked_subcommand not in skip_init_commands
7172
):
7273
from basic_memory.services.initialization import ensure_initialization
7374

src/basic_memory/cli/commands/command_utils.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,45 @@ async def _with_cleanup() -> T:
4242
return asyncio.run(_with_cleanup())
4343

4444

45-
async def run_sync(project: Optional[str] = None, force_full: bool = False):
45+
async def run_sync(
46+
project: Optional[str] = None,
47+
force_full: bool = False,
48+
run_in_background: bool = True,
49+
):
4650
"""Run sync operation via API endpoint.
4751
4852
Args:
4953
project: Optional project name
5054
force_full: If True, force a full scan bypassing watermark optimization
55+
run_in_background: If True, return immediately; if False, wait for completion
5156
"""
5257

5358
try:
5459
async with get_client() as client:
5560
project_item = await get_active_project(client, project, None)
5661
url = f"{project_item.project_url}/project/sync"
62+
params = []
5763
if force_full:
58-
url += "?force_full=true"
64+
params.append("force_full=true")
65+
if not run_in_background:
66+
params.append("run_in_background=false")
67+
if params:
68+
url += "?" + "&".join(params)
5969
response = await call_post(client, url)
6070
data = response.json()
61-
console.print(f"[green]{data['message']}[/green]")
71+
# Background mode returns {"message": "..."}, foreground returns SyncReportResponse
72+
if "message" in data:
73+
console.print(f"[green]{data['message']}[/green]")
74+
else:
75+
# Foreground mode - show summary of sync results
76+
total = data.get("total", 0)
77+
new_count = len(data.get("new", []))
78+
modified_count = len(data.get("modified", []))
79+
deleted_count = len(data.get("deleted", []))
80+
console.print(
81+
f"[green]Synced {total} files[/green] "
82+
f"(new: {new_count}, modified: {modified_count}, deleted: {deleted_count})"
83+
)
6284
except (ToolError, ValueError) as e:
6385
console.print(f"[red]Sync failed: {e}[/red]")
6486
raise typer.Exit(1)
Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,103 @@
11
"""Database management commands."""
22

33
import asyncio
4+
from pathlib import Path
45

56
import typer
67
from loguru import logger
8+
from rich.console import Console
9+
from sqlalchemy.exc import OperationalError
710

811
from basic_memory import db
912
from basic_memory.cli.app import app
10-
from basic_memory.config import ConfigManager, BasicMemoryConfig, save_basic_memory_config
13+
from basic_memory.config import ConfigManager
14+
from basic_memory.repository import ProjectRepository
15+
from basic_memory.services.initialization import reconcile_projects_with_config
16+
from basic_memory.sync.sync_service import get_sync_service
17+
18+
console = Console()
19+
20+
21+
async def _reindex_projects(app_config):
22+
"""Reindex all projects in a single async context.
23+
24+
This ensures all database operations use the same event loop,
25+
and proper cleanup happens when the function completes.
26+
"""
27+
try:
28+
await reconcile_projects_with_config(app_config)
29+
30+
# Get database session (migrations already run if needed)
31+
_, session_maker = await db.get_or_create_db(
32+
db_path=app_config.database_path,
33+
db_type=db.DatabaseType.FILESYSTEM,
34+
)
35+
project_repository = ProjectRepository(session_maker)
36+
projects = await project_repository.get_active_projects()
37+
38+
for project in projects:
39+
console.print(f" Indexing [cyan]{project.name}[/cyan]...")
40+
logger.info(f"Starting sync for project: {project.name}")
41+
sync_service = await get_sync_service(project)
42+
sync_dir = Path(project.path)
43+
await sync_service.sync(sync_dir, project_name=project.name)
44+
logger.info(f"Sync completed for project: {project.name}")
45+
finally:
46+
# Clean up database connections before event loop closes
47+
await db.shutdown_db()
1148

1249

1350
@app.command()
1451
def reset(
1552
reindex: bool = typer.Option(False, "--reindex", help="Rebuild db index from filesystem"),
1653
): # pragma: no cover
1754
"""Reset database (drop all tables and recreate)."""
18-
if typer.confirm("This will delete all data in your db. Are you sure?"):
55+
console.print(
56+
"[yellow]Note:[/yellow] This only deletes the index database. "
57+
"Your markdown note files will not be affected.\n"
58+
"Use [green]bm reset --reindex[/green] to automatically rebuild the index afterward."
59+
)
60+
if typer.confirm("Reset the database index?"):
1961
logger.info("Resetting database...")
2062
config_manager = ConfigManager()
2163
app_config = config_manager.config
2264
# Get database path
2365
db_path = app_config.app_database_path
2466

25-
# Delete the database file if it exists
26-
if db_path.exists():
27-
db_path.unlink()
28-
logger.info(f"Database file deleted: {db_path}")
67+
# Delete the database file and WAL files if they exist
68+
for suffix in ["", "-shm", "-wal"]:
69+
path = db_path.parent / f"{db_path.name}{suffix}"
70+
if path.exists():
71+
try:
72+
path.unlink()
73+
logger.info(f"Deleted: {path}")
74+
except OSError as e:
75+
console.print(
76+
f"[red]Error:[/red] Cannot delete {path.name}: {e}\n"
77+
"The database may be in use by another process (e.g., MCP server).\n"
78+
"Please close Claude Desktop or any other Basic Memory clients and try again."
79+
)
80+
raise typer.Exit(1)
2981

30-
# Reset project configuration
31-
config = BasicMemoryConfig()
32-
save_basic_memory_config(config_manager.config_file, config)
33-
logger.info("Project configuration reset to default")
34-
35-
# Create a new empty database
36-
asyncio.run(db.run_migrations(app_config))
37-
logger.info("Database reset complete")
82+
# Create a new empty database (preserves project configuration)
83+
try:
84+
asyncio.run(db.run_migrations(app_config))
85+
except OperationalError as e:
86+
if "disk I/O error" in str(e) or "database is locked" in str(e):
87+
console.print(
88+
"[red]Error:[/red] Cannot access database. "
89+
"It may be in use by another process (e.g., MCP server).\n"
90+
"Please close Claude Desktop or any other Basic Memory clients and try again."
91+
)
92+
raise typer.Exit(1)
93+
raise
94+
console.print("[green]Database reset complete[/green]")
3895

3996
if reindex:
40-
# Run database sync directly
41-
from basic_memory.cli.commands.command_utils import run_sync
42-
43-
logger.info("Rebuilding search index from filesystem...")
44-
asyncio.run(run_sync(project=None))
97+
projects = list(app_config.projects)
98+
if not projects:
99+
console.print("[yellow]No projects configured. Skipping reindex.[/yellow]")
100+
else:
101+
console.print(f"Rebuilding search index for {len(projects)} project(s)...")
102+
asyncio.run(_reindex_projects(app_config))
103+
console.print("[green]Reindex complete[/green]")

src/basic_memory/repository/sqlite_search_repository.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,15 @@ class SQLiteSearchRepository(SearchRepositoryBase):
2727
"""
2828

2929
async def init_search_index(self):
30-
"""Create FTS5 virtual table for search.
30+
"""Create FTS5 virtual table for search if it doesn't exist.
3131
32-
Note: Drops any existing search_index table first to ensure FTS5 virtual table creation.
33-
This is necessary because Base.metadata.create_all() might create a regular table.
32+
Uses CREATE VIRTUAL TABLE IF NOT EXISTS to preserve existing indexed data
33+
across server restarts.
3434
"""
3535
logger.info("Initializing SQLite FTS5 search index")
3636
try:
3737
async with db.scoped_session(self.session_maker) as session:
38-
# Drop any existing regular or virtual table first
39-
await session.execute(text("DROP TABLE IF EXISTS search_index"))
40-
# Create FTS5 virtual table
38+
# Create FTS5 virtual table if it doesn't exist
4139
await session.execute(CREATE_SEARCH_INDEX)
4240
await session.commit()
4341
except Exception as e: # pragma: no cover

tests/mcp/clients/test_clients.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for typed API clients."""
22

33
import pytest
4-
from unittest.mock import AsyncMock, MagicMock
4+
from unittest.mock import MagicMock
55

66
from basic_memory.mcp.clients import (
77
KnowledgeClient,

tests/repository/test_search_repository.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,48 @@ async def test_init_search_index(search_repository, app_config):
107107
assert table_name == "search_index"
108108

109109

110+
@pytest.mark.asyncio
111+
async def test_init_search_index_preserves_data(search_repository, search_entity):
112+
"""Regression test: calling init_search_index() twice should preserve indexed data.
113+
114+
This test prevents regression of the bug fixed in PR #503 where
115+
init_search_index() was dropping existing data on every call due to
116+
an unconditional DROP TABLE statement.
117+
118+
The bug caused search to work immediately after creating notes, but
119+
return empty results after MCP server restarts (~30 minutes in Claude Desktop).
120+
"""
121+
# Create and index a search item
122+
search_row = SearchIndexRow(
123+
id=search_entity.id,
124+
type=SearchItemType.ENTITY.value,
125+
title=search_entity.title,
126+
content_stems="regression test content for server restart",
127+
content_snippet="This content should persist across init_search_index calls",
128+
permalink=search_entity.permalink,
129+
file_path=search_entity.file_path,
130+
entity_id=search_entity.id,
131+
metadata={"entity_type": search_entity.entity_type},
132+
created_at=search_entity.created_at,
133+
updated_at=search_entity.updated_at,
134+
project_id=search_repository.project_id,
135+
)
136+
await search_repository.index_item(search_row)
137+
138+
# Verify it's searchable
139+
results = await search_repository.search(search_text="regression test")
140+
assert len(results) == 1
141+
assert results[0].title == search_entity.title
142+
143+
# Re-initialize the search index (simulates MCP server restart)
144+
await search_repository.init_search_index()
145+
146+
# Verify data is still there after re-initialization
147+
results_after = await search_repository.search(search_text="regression test")
148+
assert len(results_after) == 1, "Search index data was lost after init_search_index()"
149+
assert results_after[0].id == search_entity.id
150+
151+
110152
@pytest.mark.asyncio
111153
async def test_index_item(search_repository, search_entity):
112154
"""Test indexing an item with project_id."""

tests/test_project_resolver.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Tests for ProjectResolver - unified project resolution logic."""
22

3-
import os
43
import pytest
54
from basic_memory.project_resolver import (
65
ProjectResolver,

0 commit comments

Comments
 (0)