Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ Bugs fixed
Patch by Adam Turner.
* #13328: Fix parsing of PEP 695 functions with return annotations.
Patch by Bénédikt Tran. Initial work by Arash Badie-Modiri.
* #13355: Don't include escaped title content in the search index.
Patch by Will Lachance.

Testing
-------
Expand Down
3 changes: 2 additions & 1 deletion sphinx/builders/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None:
def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None:
self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir)
self.post_process_images(doctree)
# get title as plain text
title_node = self.env.longtitles.get(docname)
title = self.render_partial(title_node)['title'] if title_node else ''
title = title_node.astext() if title_node else ''
Copy link
Contributor Author

@wlach wlach Feb 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Best I can tell the title is only used for indexing purposes here (corroborated by tests not failing, I hope).

self.index_page(docname, doctree, title)

def finish(self) -> None:
Expand Down
15 changes: 13 additions & 2 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ const _removeChildren = (element) => {
const _escapeRegExp = (string) =>
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string

const _escapeHTML = (text) => {
return text
.replaceAll("&", "&")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;")
.replaceAll("'", "&apos;");
}

const _displayItem = (item, searchTerms, highlightTerms) => {
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
Expand Down Expand Up @@ -340,7 +349,9 @@ const Search = {
const boost = titles[file] === title ? 1 : 0; // add a boost for document titles
normalResults.push([
docNames[file],
titles[file] !== title ? `${titles[file]} > ${title}` : title,
_escapeHTML(
titles[file] !== title ? `${titles[file]} > ${title}` : title
),
id !== null ? "#" + id : "",
null,
score + boost,
Expand All @@ -358,7 +369,7 @@ const Search = {
const score = Math.round(100 * queryLower.length / entry.length);
const result = [
docNames[file],
titles[file],
_escapeHTML(titles[file]),
id ? "#" + id : "",
null,
score,
Expand Down
2 changes: 1 addition & 1 deletion tests/js/fixtures/cpp/searchindex.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/js/searchtools.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ describe('Basic html theme search', function() {

hits = [[
"index",
"&lt;no title&gt;",
"<no title>",
"",
null,
5,
Expand Down Expand Up @@ -184,7 +184,7 @@ describe('Basic html theme search', function() {

expectedRanking = [
['index', 'Main Page', '#index-0'], /* index entry */
['index', 'Main Page > Result Scoring', '#result-scoring'], /* title */
['index', 'Main Page &gt; Result Scoring', '#result-scoring'], /* title */
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jayaddison This update to an existing test shows quoting HTML content

];

searchParameters = Search._parseQuery('scoring');
Expand All @@ -198,7 +198,7 @@ describe('Basic html theme search', function() {

expectedRanking = [
['relevance', 'Relevance', ''], /* main title */
['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */
['index', 'Main Page &gt; Relevance', '#relevance'], /* subsection heading title */
];

searchParameters = Search._parseQuery('relevance');
Expand Down
4 changes: 4 additions & 0 deletions tests/roots/test-search/escapedtitle.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
`escaped` title with < and > in it
==================================

this document has escaped content in the title but also the characters < and > in it
19 changes: 14 additions & 5 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None:
# if search term is in the title of one doc and in the text of another
# both documents should be a hit in the search index as a title,
# respectively text hit
assert '"textinhead":2' in searchindex
assert '"textinhead":0' in searchindex
assert '"textinhead":3' in searchindex
assert '"textinhead":1' in searchindex


@pytest.mark.sphinx('html', testroot='search')
def test_escaped_title(app: SphinxTestApp) -> None:
app.build(force_all=True)
searchindex = load_searchindex(app.outdir / 'searchindex.js')
print(searchindex)
assert 'escapedtitle' in searchindex['docnames']
assert 'escaped title with < and > in it' in searchindex['titles']


@pytest.mark.sphinx('html', testroot='search')
Expand Down Expand Up @@ -398,15 +407,15 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None:
def test_nosearch(app: SphinxTestApp) -> None:
app.build()
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
# latex is in 'nosearch.rst', and nowhere else
assert 'latex' not in index['terms']
# cat is in 'index.rst' but is marked with the 'no-search' class
assert 'cat' not in index['terms']
# bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and
# not from 'nosearch.rst' (document ID 1)
assert 'bat' in index['terms']
assert index['terms']['bat'] == [0, 2]
assert index['terms']['bat'] == [1, 3]


@pytest.mark.sphinx(
Expand All @@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None:
def test_parallel(app: SphinxTestApp) -> None:
app.build()
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']


@pytest.mark.sphinx('html', testroot='search')
Expand Down
Loading