Skip to content

Commit c81fb58

Browse files
authored
Merge pull request #3971 from jayaddison/issue-3969/indexer-avoid-addressrank-loop
Indexer: allow 'has_pending' to consider address-rank subsets
2 parents d7249a1 + 8c3c1f0 commit c81fb58

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

src/nominatim_db/clicmd/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,4 @@ async def _do_index(self, args: NominatimArgs) -> None:
6464
if not args.boundaries_only:
6565
await indexer.index_by_rank(args.minrank, args.maxrank)
6666
await indexer.index_postcodes()
67-
has_pending = indexer.has_pending()
67+
has_pending = indexer.has_pending(args.minrank, args.maxrank)

src/nominatim_db/indexer/indexer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,19 @@ def __init__(self, dsn: str, tokenizer: AbstractTokenizer, num_threads: int):
3131
self.tokenizer = tokenizer
3232
self.num_threads = num_threads
3333

34-
def has_pending(self) -> bool:
34+
def has_pending(self, minrank: int = 0, maxrank: int = 30) -> bool:
3535
""" Check if any data still needs indexing.
3636
This function must only be used after the import has finished.
3737
Otherwise it will be very expensive.
3838
"""
3939
with connect(self.dsn) as conn:
4040
with conn.cursor() as cur:
41-
cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
41+
cur.execute(""" SELECT 'a'
42+
FROM placex
43+
WHERE rank_address BETWEEN %s AND %s
44+
AND indexed_status > 0
45+
LIMIT 1""",
46+
(minrank, maxrank))
4247
return cur.rowcount > 0
4348

4449
async def index_full(self, analyse: bool = True) -> None:

test/python/cli/test_cmd_index.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# SPDX-License-Identifier: GPL-2.0-or-later
2+
#
3+
# This file is part of Nominatim. (https://nominatim.org)
4+
#
5+
# Copyright (C) 2025 by the Nominatim developer community.
6+
# For a full list of authors see the git log.
7+
"""
8+
Tests for index command of the command-line interface wrapper.
9+
"""
10+
import pytest
11+
12+
import nominatim_db.indexer.indexer
13+
14+
15+
class TestCliIndexWithDb:
16+
17+
@pytest.fixture(autouse=True)
18+
def setup_cli_call(self, cli_call, cli_tokenizer_mock):
19+
self.call_nominatim = cli_call
20+
self.tokenizer_mock = cli_tokenizer_mock
21+
22+
def test_index_empty_subset(self, monkeypatch, async_mock_func_factory, placex_row):
23+
placex_row(rank_address=1, indexed_status=1)
24+
placex_row(rank_address=20, indexed_status=1)
25+
26+
mocks = [
27+
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_boundaries'),
28+
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_by_rank'),
29+
async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, 'index_postcodes'),
30+
]
31+
32+
def _reject_repeat_call(*args, **kwargs):
33+
assert False, "Did not expect multiple Indexer.has_pending invocations"
34+
35+
has_pending_calls = [nominatim_db.indexer.indexer.Indexer.has_pending, _reject_repeat_call]
36+
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
37+
lambda *args, **kwargs: has_pending_calls.pop(0)(*args, **kwargs))
38+
39+
assert self.call_nominatim('index', '--minrank', '5', '--maxrank', '10') == 0
40+
41+
for mock in mocks:
42+
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)

0 commit comments

Comments
 (0)