Skip to content

Commit 7eb77f2

Browse files
authored
HTML Search: omit anchor reference from document titles in the search index. (#12047)
1 parent 082f13f commit 7eb77f2

File tree

7 files changed

+54
-26
lines changed

7 files changed

+54
-26
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ Bugs fixed
8989
* #12494: Fix invalid genindex.html file produced with translated docs
9090
(regression in 7.1.0).
9191
Patch by Nicolas Peugnet.
92+
* #11961: Omit anchor references from document title entries in the search index,
93+
removing duplication of search results.
94+
Patch by James Addison.
9295

9396
Testing
9497
-------

sphinx/environment/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,15 +253,15 @@ def __init__(self, app: Sphinx) -> None:
253253
# search index data
254254

255255
# docname -> title
256-
self._search_index_titles: dict[str, str] = {}
256+
self._search_index_titles: dict[str, str | None] = {}
257257
# docname -> filename
258258
self._search_index_filenames: dict[str, str] = {}
259259
# stemmed words -> set(docname)
260260
self._search_index_mapping: dict[str, set[str]] = {}
261261
# stemmed words in titles -> set(docname)
262262
self._search_index_title_mapping: dict[str, set[str]] = {}
263263
# docname -> all titles in document
264-
self._search_index_all_titles: dict[str, list[tuple[str, str]]] = {}
264+
self._search_index_all_titles: dict[str, list[tuple[str, str | None]]] = {}
265265
# docname -> list(index entry)
266266
self._search_index_index_entries: dict[str, list[tuple[str, str, str]]] = {}
267267
# objtype -> index

sphinx/search/__init__.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def _is_meta_keywords(
198198
@dataclasses.dataclass
199199
class WordStore:
200200
words: list[str] = dataclasses.field(default_factory=list)
201-
titles: list[tuple[str, str]] = dataclasses.field(default_factory=list)
201+
titles: list[tuple[str, str | None]] = dataclasses.field(default_factory=list)
202202
title_words: list[str] = dataclasses.field(default_factory=list)
203203

204204

@@ -253,15 +253,15 @@ class IndexBuilder:
253253
def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str) -> None:
254254
self.env = env
255255
# docname -> title
256-
self._titles: dict[str, str] = env._search_index_titles
256+
self._titles: dict[str, str | None] = env._search_index_titles
257257
# docname -> filename
258258
self._filenames: dict[str, str] = env._search_index_filenames
259259
# stemmed words -> set(docname)
260260
self._mapping: dict[str, set[str]] = env._search_index_mapping
261261
# stemmed words in titles -> set(docname)
262262
self._title_mapping: dict[str, set[str]] = env._search_index_title_mapping
263263
# docname -> all titles in document
264-
self._all_titles: dict[str, list[tuple[str, str]]] = env._search_index_all_titles
264+
self._all_titles: dict[str, list[tuple[str, str | None]]] = env._search_index_all_titles
265265
# docname -> list(index entry)
266266
self._index_entries: dict[str, list[tuple[str, str, str]]] = env._search_index_index_entries
267267
# objtype -> index
@@ -369,6 +369,13 @@ def get_objects(self, fn2index: dict[str, int]
369369
return rv
370370

371371
def get_terms(self, fn2index: dict[str, int]) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]:
372+
"""
373+
Return a mapping of document and title terms to their corresponding sorted document IDs.
374+
375+
When a term is only found within a single document, then the value for that term will be
376+
an integer value. When a term is found within multiple documents, the value will be a list
377+
of integers.
378+
"""
372379
rvs: tuple[dict[str, list[int] | int], dict[str, list[int] | int]] = ({}, {})
373380
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
374381
for k, v in mapping.items():
@@ -391,7 +398,7 @@ def freeze(self) -> dict[str, Any]:
391398
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
392399
objnames = self._objnames
393400

394-
alltitles: dict[str, list[tuple[int, str]]] = {}
401+
alltitles: dict[str, list[tuple[int, str | None]]] = {}
395402
for docname, titlelist in sorted(self._all_titles.items()):
396403
for title, titleid in titlelist:
397404
alltitles.setdefault(title, []).append((fn2index[docname], titleid))
@@ -502,9 +509,10 @@ def _visit_nodes(node):
502509
elif isinstance(node, nodes.Text):
503510
word_store.words.extend(split(node.astext()))
504511
elif isinstance(node, nodes.title):
505-
title = node.astext()
512+
title, is_main_title = node.astext(), len(word_store.titles) == 0
506513
ids = node.parent['ids']
507-
word_store.titles.append((title, ids[0] if ids else None))
514+
title_node_id = None if is_main_title else ids[0] if ids else None
515+
word_store.titles.append((title, title_node_id))
508516
word_store.title_words.extend(split(title))
509517
for child in node.children:
510518
_visit_nodes(child)

tests/js/fixtures/multiterm/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/js/fixtures/partial/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/js/searchtools.js

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,21 +70,12 @@ describe('Basic html theme search', function() {
7070

7171
searchParameters = Search._parseQuery('main page');
7272

73-
// fixme: duplicate result due to https://github.com/sphinx-doc/sphinx/issues/11961
7473
hits = [
7574
[
7675
'index',
7776
'Main Page',
7877
'',
7978
null,
80-
15,
81-
'index.rst'
82-
],
83-
[
84-
'index',
85-
'Main Page',
86-
'#main-page',
87-
null,
8879
100,
8980
'index.rst'
9081
]

tests/test_search.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def is_registered_term(index, keyword):
7171
7272
.. test that comments are not indexed: boson
7373
74+
another_title
75+
=============
76+
7477
test that non-comments are indexed: fermion
7578
'''
7679

@@ -168,6 +171,10 @@ def test_IndexBuilder():
168171
'docname2_1': 'title2_1', 'docname2_2': 'title2_2'}
169172
assert index._filenames == {'docname1_1': 'filename1_1', 'docname1_2': 'filename1_2',
170173
'docname2_1': 'filename2_1', 'docname2_2': 'filename2_2'}
174+
# note: element iteration order (sort order) is important when the index
175+
# is frozen (serialized) during build -- however, the _mapping-related
176+
# dictionaries below may be iterated in arbitrary order by Python at
177+
# runtime.
171178
assert index._mapping == {
172179
'ar': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
173180
'fermion': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
@@ -176,7 +183,10 @@ def test_IndexBuilder():
176183
'index': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
177184
'test': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
178185
}
179-
assert index._title_mapping == {'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}}
186+
assert index._title_mapping == {
187+
'another_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
188+
'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
189+
}
180190
assert index._objtypes == {}
181191
assert index._objnames == {}
182192

@@ -196,8 +206,14 @@ def test_IndexBuilder():
196206
'non': [0, 1, 2, 3],
197207
'test': [0, 1, 2, 3]},
198208
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
199-
'titleterms': {'section_titl': [0, 1, 2, 3]},
200-
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]},
209+
'titleterms': {
210+
'another_titl': [0, 1, 2, 3],
211+
'section_titl': [0, 1, 2, 3],
212+
},
213+
'alltitles': {
214+
'another_title': [(0, 'another-title'), (1, 'another-title'), (2, 'another-title'), (3, 'another-title')],
215+
'section_title': [(0, None), (1, None), (2, None), (3, None)],
216+
},
201217
'indexentries': {},
202218
}
203219
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
@@ -238,7 +254,10 @@ def test_IndexBuilder():
238254
'index': {'docname1_2', 'docname2_2'},
239255
'test': {'docname1_2', 'docname2_2'},
240256
}
241-
assert index._title_mapping == {'section_titl': {'docname1_2', 'docname2_2'}}
257+
assert index._title_mapping == {
258+
'another_titl': {'docname1_2', 'docname2_2'},
259+
'section_titl': {'docname1_2', 'docname2_2'},
260+
}
242261
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
243262
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'), 1: ('dummy2', 'objtype1', 'objtype1')}
244263

@@ -257,8 +276,14 @@ def test_IndexBuilder():
257276
'non': [0, 1],
258277
'test': [0, 1]},
259278
'titles': ('title1_2', 'title2_2'),
260-
'titleterms': {'section_titl': [0, 1]},
261-
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]},
279+
'titleterms': {
280+
'another_titl': [0, 1],
281+
'section_titl': [0, 1],
282+
},
283+
'alltitles': {
284+
'another_title': [(0, 'another-title'), (1, 'another-title')],
285+
'section_title': [(0, None), (1, None)],
286+
},
262287
'indexentries': {},
263288
}
264289
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
@@ -347,7 +372,8 @@ def assert_is_sorted(item, path: str):
347372
assert_is_sorted(value, f'{path}.{key}')
348373
elif isinstance(item, list):
349374
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
350-
assert item == sorted(item), f'{err_path} is not sorted'
375+
# sort nulls last; http://stackoverflow.com/questions/19868767/
376+
assert item == sorted(item, key=lambda x: (x is None, x)), f'{err_path} is not sorted'
351377
for i, child in enumerate(item):
352378
assert_is_sorted(child, f'{path}[{i}]')
353379

0 commit comments

Comments
 (0)