Skip to content

Commit 39d1f55

Browse files
authored
Fix bug in search reindex command (#689)
1 parent 393e5d6 commit 39d1f55

File tree

4 files changed

+83
-27
lines changed

4 files changed

+83
-27
lines changed

gramps_webapi/__main__.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@
3030
import warnings
3131
import webbrowser
3232
from threading import Thread
33-
3433
import click
3534
import waitress # type: ignore
3635

3736
from .api.search import get_search_indexer, get_semantic_search_indexer
3837
from .api.tasks import send_email_confirm_email, send_email_reset_password
38+
from .types import ProgressCallback
3939
from .api.util import close_db, get_db_manager, list_trees
4040
from .app import create_app
4141
from .auth import add_user, delete_user, fill_tree, user_db
@@ -259,17 +259,22 @@ def search(ctx, tree, semantic):
259259
ctx.obj["search_indexer"] = get_search_indexer(tree=tree)
260260

261261

262-
def progress_callback_count(
263-
app, current: int, total: int, prev: int | None = None
264-
) -> None:
265-
if total == 0:
266-
return
267-
pct = int(100 * current / total)
268-
if prev is None:
269-
prev = current - 1
270-
pct_prev = int(100 * prev / total)
271-
if current == 0 or pct != pct_prev:
272-
app.logger.info(f"Progress: {pct}%")
262+
def progress_callback_count_factory(app) -> ProgressCallback:
263+
"""Create a progress callback function that logs progress to the app logger."""
264+
265+
def progress_callback_count(
266+
current: int, total: int, prev: int | None = None
267+
) -> None:
268+
if total == 0:
269+
return
270+
pct = int(100 * current / total)
271+
if prev is None:
272+
prev = current - 1
273+
pct_prev = int(100 * prev / total)
274+
if current == 0 or pct != pct_prev:
275+
app.logger.info(f"Progress: {pct}%")
276+
277+
return progress_callback_count
273278

274279

275280
@search.command("index-full")
@@ -284,9 +289,10 @@ def index_full(ctx):
284289

285290
t0 = time.time()
286291
try:
287-
indexer.reindex_full(db, progress_cb=progress_callback_count)
288-
except:
292+
indexer.reindex_full(db, progress_cb=progress_callback_count_factory(app))
293+
except Exception as e:
289294
app.logger.exception("Error during indexing")
295+
raise click.ClickException(f"Indexing failed: {e}")
290296
finally:
291297
close_db(db)
292298
app.logger.info(f"Done building search index in {time.time() - t0:.0f} seconds.")
@@ -302,9 +308,12 @@ def index_incremental(ctx):
302308
db = db_manager.get_db().db
303309

304310
try:
305-
indexer.reindex_incremental(db, progress_cb=progress_callback_count)
306-
except Exception:
311+
indexer.reindex_incremental(
312+
db, progress_cb=progress_callback_count_factory(app)
313+
)
314+
except Exception as e:
307315
app.logger.exception("Error during indexing")
316+
raise click.ClickException(f"Indexing failed: {e}")
308317
finally:
309318
close_db(db)
310319
app.logger.info("Done updating search index.")

gramps_webapi/api/search/indexer.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121

2222
from __future__ import annotations
2323

24-
from typing import Any, Callable, Dict, List, Optional, Set
24+
from typing import Any, Callable, Dict, List, Set
2525

2626
import sifts
2727
from gramps.gen.db.base import DbReadBase
2828

29+
from ...types import ProgressCallback
2930
from .text import iter_obj_strings, obj_strings_from_handle
3031
from ..util import get_total_number_of_objects, get_object_timestamps
3132

@@ -39,7 +40,7 @@ class SearchIndexerBase:
3940
def __init__(
4041
self,
4142
tree: str,
42-
db_url: Optional[str] = None,
43+
db_url: str | None = None,
4344
embedding_function: Callable | None = None,
4445
use_fts: bool = True,
4546
use_semantic_text: bool = False,
@@ -126,7 +127,7 @@ def _add_objects(self, obj_dicts: List[Dict[str, Any]]):
126127
self.index_public.add(contents=contents, ids=ids, metadatas=metadatas)
127128

128129
def reindex_full(
129-
self, db_handle: DbReadBase, progress_cb: Optional[Callable] = None
130+
self, db_handle: DbReadBase, progress_cb: ProgressCallback | None = None
130131
):
131132
"""Reindex the whole database."""
132133
total = get_total_number_of_objects(db_handle)
@@ -213,7 +214,7 @@ def add_or_update_object(
213214
self._add_objects([obj_dict])
214215

215216
def reindex_incremental(
216-
self, db_handle: DbReadBase, progress_cb: Optional[Callable] = None
217+
self, db_handle: DbReadBase, progress_cb: ProgressCallback | None = None
217218
):
218219
"""Update the index incrementally."""
219220
update_info = self._get_update_info(db_handle)
@@ -287,10 +288,10 @@ def search(
287288
page: int,
288289
pagesize: int,
289290
include_private: bool = True,
290-
sort: Optional[List[str]] = None,
291-
object_types: Optional[List[str]] = None,
292-
change_op: Optional[str] = None,
293-
change_value: Optional[float] = None,
291+
sort: List[str] | None = None,
292+
object_types: List[str] | None = None,
293+
change_op: str | None = None,
294+
change_value: float | None = None,
294295
include_content: bool = False,
295296
):
296297
"""Search the index.
@@ -338,7 +339,7 @@ class SearchIndexer(SearchIndexerBase):
338339
def __init__(
339340
self,
340341
tree: str,
341-
db_url: Optional[str] = None,
342+
db_url: str | None = None,
342343
):
343344
"""Initialize the indexer."""
344345
super().__init__(
@@ -355,7 +356,7 @@ class SemanticSearchIndexer(SearchIndexerBase):
355356
def __init__(
356357
self,
357358
tree: str,
358-
db_url: Optional[str] = None,
359+
db_url: str | None = None,
359360
embedding_function: Callable | None = None,
360361
):
361362
"""Initialize the indexer."""

gramps_webapi/types.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from __future__ import annotations
2323

2424
from pathlib import Path
25-
from typing import Any, NewType, Union
25+
from typing import Any, NewType, Protocol, Union
2626

2727
import flask.typing
2828

@@ -32,3 +32,10 @@
3232
TransactionJson = list[dict[str, Any]]
3333
ResponseReturnValue = flask.typing.ResponseReturnValue
3434
MatchSegment = dict[str, Union[float, int, str, None]]
35+
36+
37+
class ProgressCallback(Protocol):
38+
"""Protocol for progress callback functions."""
39+
40+
def __call__(self, current: int, total: int, prev: int | None = None) -> None:
41+
"""Call the progress callback with current progress, total items, and optional previous value."""

tests/test_cli.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def test_search_reindex_incremental(self):
154154
],
155155
)
156156
assert result.exit_code == 0
157+
# The log messages go to the logger, not stdout, so we can't check for them in result.output
158+
# But we can ensure there's no error output and the command succeeded
159+
assert "Error" not in result.output
160+
assert "Exception" not in result.output
157161

158162
def test_search_reindex_full(self):
159163
tree = WebDbManager(name=self.name).dirname
@@ -169,6 +173,10 @@ def test_search_reindex_full(self):
169173
],
170174
)
171175
assert result.exit_code == 0
176+
# The log messages go to the logger, not stdout, so we can't check for them in result.output
177+
# But we can ensure there's no error output and the command succeeded
178+
assert "Error" not in result.output
179+
assert "Exception" not in result.output
172180

173181
def test_search_reindex_incremental_notree(self):
174182
tree = WebDbManager(name=self.name).dirname
@@ -182,6 +190,10 @@ def test_search_reindex_incremental_notree(self):
182190
],
183191
)
184192
assert result.exit_code == 0
193+
# The log messages go to the logger, not stdout, so we can't check for them in result.output
194+
# But we can ensure there's no error output and the command succeeded
195+
assert "Error" not in result.output
196+
assert "Exception" not in result.output
185197

186198
def test_search_reindex_full_notree(self):
187199
tree = WebDbManager(name=self.name).dirname
@@ -195,3 +207,30 @@ def test_search_reindex_full_notree(self):
195207
],
196208
)
197209
assert result.exit_code == 0
210+
# The log messages go to the logger, not stdout, so we can't check for them in result.output
211+
# But we can ensure there's no error output and the command succeeded
212+
assert "Error" not in result.output
213+
assert "Exception" not in result.output
214+
215+
def test_search_reindex_with_broken_progress_callback(self):
216+
"""Test that would catch the original bug where progress_callback_count was undefined."""
217+
tree = WebDbManager(name=self.name).dirname
218+
219+
# Temporarily break the progress callback by patching it to raise an error
220+
with patch('gramps_webapi.__main__.progress_callback_count_factory') as mock_factory:
221+
mock_factory.side_effect = NameError("name 'progress_callback_count' is not defined")
222+
223+
result = self.runner.invoke(
224+
cli,
225+
[
226+
"--config",
227+
self.config_file.name,
228+
"search",
229+
"--tree",
230+
tree,
231+
"index-full",
232+
],
233+
)
234+
# With the improved error handling, this should now fail with a non-zero exit code
235+
assert result.exit_code != 0
236+
assert "Indexing failed" in result.output

0 commit comments

Comments
 (0)