Skip to content

Commit 8e768e6

Browse files
authored
Make `searchindex.js deterministic (#11665)
1 parent f0c25a0 commit 8e768e6

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

CHANGES.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Bugs fixed
1818

1919
* #11668: Raise a useful error when ``theme.conf`` is missing.
2020
Patch by Vinay Sajip.
21+
* #11622: Ensure that the order of keys in ``searchindex.js`` is deterministic.
22+
Patch by Pietro Albini.
2123

2224
Testing
2325
-------

sphinx/search/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ class _JavaScriptIndex:
162162
SUFFIX = ')'
163163

164164
def dumps(self, data: Any) -> str:
165-
return self.PREFIX + json.dumps(data) + self.SUFFIX
165+
return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX
166166

167167
def loads(self, s: str) -> Any:
168168
data = s[len(self.PREFIX):-len(self.SUFFIX)]

tests/test_search.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,40 @@ def test_parallel(app):
304304
app.build()
305305
index = load_searchindex(app.outdir / 'searchindex.js')
306306
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
307+
308+
309+
@pytest.mark.sphinx(testroot='search')
310+
def test_search_index_is_deterministic(app):
311+
lists_not_to_sort = {
312+
# Each element of .titles is related to the element of .docnames in the same position.
313+
# The ordering is deterministic because .docnames is sorted.
314+
'.titles',
315+
# Each element of .filenames is related to the element of .docnames in the same position.
316+
# The ordering is deterministic because .docnames is sorted.
317+
'.filenames',
318+
}
319+
320+
# In the search index, titles inside .alltitles are stored as a tuple of
321+
# (document_idx, title_anchor). Tuples are represented as lists in JSON,
322+
# but their contents must not be sorted. We cannot sort them anyway, as
323+
# document_idx is an int and title_anchor is a str.
324+
def is_title_tuple_type(item):
325+
return len(item) == 2 and isinstance(item[0], int) and isinstance(item[1], str)
326+
327+
def assert_is_sorted(item, path):
328+
err_path = path if path else '<root>'
329+
if isinstance(item, dict):
330+
assert list(item.keys()) == sorted(item.keys()), f'{err_path} is not sorted'
331+
for key, value in item.items():
332+
assert_is_sorted(value, f'{path}.{key}')
333+
elif isinstance(item, list):
334+
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
335+
assert item == sorted(item), f'{err_path} is not sorted'
336+
for i, child in enumerate(item):
337+
assert_is_sorted(child, f'{path}[{i}]')
338+
339+
app.builder.build_all()
340+
index = load_searchindex(app.outdir / 'searchindex.js')
341+
# Pretty print the index. Only shown by pytest on failure.
342+
print(f'searchindex.js contents:\n\n{json.dumps(index, indent=2)}')
343+
assert_is_sorted(index, '')

0 commit comments

Comments
 (0)