sphinx-doc · wlach · Feb 17, 2025 · Feb 17, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -177,6 +177,8 @@ Bugs fixed
   Patch by Adam Turner.
 * #13328: Fix parsing of PEP 695 functions with return annotations.
   Patch by Bénédikt Tran. Initial work by Arash Badie-Modiri.
+* #13355: Don't include escaped title content in the search index.
+  Patch by Will Lachance.
 
 Testing
 -------

diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py
@@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None:
     def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None:
         self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir)
         self.post_process_images(doctree)
+        # get title as plain text
         title_node = self.env.longtitles.get(docname)
-        title = self.render_partial(title_node)['title'] if title_node else ''
+        title = title_node.astext() if title_node else ''
         self.index_page(docname, doctree, title)
 
     def finish(self) -> None:

diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js
@@ -58,6 +58,15 @@ const _removeChildren = (element) => {
 const _escapeRegExp = (string) =>
   string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
 
+const _escapeHTML = (text) => {
+  return text
+    .replaceAll("&", "&amp;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("'", "&apos;");
+}
+
 const _displayItem = (item, searchTerms, highlightTerms) => {
   const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
   const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
@@ -90,10 +99,10 @@ const _displayItem = (item, searchTerms, highlightTerms) => {
   let linkEl = listItem.appendChild(document.createElement("a"));
   linkEl.href = linkUrl + anchor;
   linkEl.dataset.score = score;
-  linkEl.innerHTML = title;
+  linkEl.innerHTML = _escapeHTML(title);
   if (descr) {
     listItem.appendChild(document.createElement("span")).innerHTML =
-      " (" + descr + ")";
+      " (" + _escapeHTML(descr) + ")";
     // highlight search terms in the description
     if (SPHINX_HIGHLIGHT_ENABLED)  // set in sphinx_highlight.js
       highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));

diff --git a/tests/js/fixtures/cpp/searchindex.js b/tests/js/fixtures/cpp/searchindex.js
diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js
@@ -34,7 +34,7 @@ describe('Basic html theme search', function() {
 
       hits = [[
         "index",
-        "&lt;no title&gt;",
+        "<no title>",
         "",
         null,
         5,

diff --git a/tests/roots/test-search/escapedtitle.rst b/tests/roots/test-search/escapedtitle.rst
@@ -0,0 +1,4 @@
+`escaped` title with < and > in it
+==================================
+
+this document has escaped content in the title but also the characters < and > in it
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None:
     # if search term is in the title of one doc and in the text of another
     # both documents should be a hit in the search index as a title,
     # respectively text hit
-    assert '"textinhead":2' in searchindex
-    assert '"textinhead":0' in searchindex
+    assert '"textinhead":3' in searchindex
+    assert '"textinhead":1' in searchindex
+
+
+@pytest.mark.sphinx('html', testroot='search')
+def test_escaped_title(app: SphinxTestApp) -> None:
+    app.build(force_all=True)
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
+    print(searchindex)
+    assert 'escapedtitle' in searchindex['docnames']
+    assert 'escaped title with < and > in it' in searchindex['titles']
 
 
 @pytest.mark.sphinx('html', testroot='search')
@@ -398,15 +407,15 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None:
 def test_nosearch(app: SphinxTestApp) -> None:
     app.build()
     index = load_searchindex(app.outdir / 'searchindex.js')
-    assert index['docnames'] == ['index', 'nosearch', 'tocitem']
+    assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
     # latex is in 'nosearch.rst', and nowhere else
     assert 'latex' not in index['terms']
     # cat is in 'index.rst' but is marked with the 'no-search' class
     assert 'cat' not in index['terms']
     # bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and
     # not from 'nosearch.rst' (document ID 1)
     assert 'bat' in index['terms']
-    assert index['terms']['bat'] == [0, 2]
+    assert index['terms']['bat'] == [1, 3]
 
 
 @pytest.mark.sphinx(
@@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None:
 def test_parallel(app: SphinxTestApp) -> None:
     app.build()
     index = load_searchindex(app.outdir / 'searchindex.js')
-    assert index['docnames'] == ['index', 'nosearch', 'tocitem']
+    assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
 
 
 @pytest.mark.sphinx('html', testroot='search')