sphinx-doc
diff --git a/‎pyproject.toml‎
Lines changed: 0 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎sphinx/testing/util.py‎
Lines changed: 6 additions & 6 deletions b/‎sphinx/testing/util.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎tests/test_builders/conftest.py‎
Lines changed: 12 additions & 12 deletions b/‎tests/test_builders/conftest.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎tests/test_builders/test_build_html.py‎
Lines changed: 2 additions & 37 deletions b/‎tests/test_builders/test_build_html.py‎
Lines changed: 2 additions & 37 deletions
diff --git a/‎tests/test_builders/test_build_html_5_output.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/test_builders/test_build_html_5_output.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/test_builders/test_build_html_numfig.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/test_builders/test_build_html_numfig.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/test_builders/test_build_html_tocdepth.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_builders/test_build_html_tocdepth.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_builders/xpath_data.py‎
Lines changed: 8 additions & 0 deletions b/‎tests/test_builders/xpath_data.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/test_builders/xpath_util.py‎
Lines changed: 79 additions & 0 deletions b/‎tests/test_builders/xpath_util.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎tests/test_domains/test_domain_std.py‎
Lines changed: 6 additions & 5 deletions b/‎tests/test_domains/test_domain_std.py‎
Lines changed: 6 additions & 5 deletions
@@ -91,7 +91,6 @@ lint = [
 ]
 test = [
     "pytest>=6.0",
-    "html5lib",
     "defusedxml>=0.7.1", # for secure XML/HTML parsing
     "cython>=3.0",
     "setuptools>=67.0",  # for Cython compilation
 
@@ -7,12 +7,11 @@
 import contextlib
 import os
 import sys
-import warnings
 from io import StringIO
 from types import MappingProxyType
 from typing import TYPE_CHECKING
-from xml.etree import ElementTree
 
+from defusedxml.ElementTree import parse as xml_parse
 from docutils import nodes
 from docutils.parsers.rst import directives, roles
 
@@ -26,6 +25,7 @@
     from collections.abc import Mapping
     from pathlib import Path
     from typing import Any, Final
+    from xml.etree.ElementTree import ElementTree
 
     from docutils.nodes import Node
 
@@ -70,10 +70,10 @@ def assert_node(node: Node, cls: Any = None, xpath: str = "", **kwargs: Any) ->
                 f'The node{xpath}[{key}] is not {value!r}: {node[key]!r}'
 
 
-def etree_parse(path: str) -> Any:
-    with warnings.catch_warnings(record=False):
-        warnings.filterwarnings("ignore", category=DeprecationWarning)
-        return ElementTree.parse(path)  # NoQA: S314  # using known data in tests
+# keep this to restrict the API usage and to have a correct return type
+def etree_parse(path: str | os.PathLike[str]) -> ElementTree:
+    """Parse a file into a (safe) XML element tree."""
+    return xml_parse(path)
 
 
 class SphinxTestApp(sphinx.application.Sphinx):
 
@@ -3,26 +3,26 @@
 from typing import TYPE_CHECKING
 
 import pytest
-from html5lib import HTMLParser
+
+from sphinx.testing.util import etree_parse
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator
     from pathlib import Path
-    from xml.etree.ElementTree import Element
+    from xml.etree.ElementTree import ElementTree
+
+_etree_cache: dict[Path, ElementTree] = {}
 
-etree_cache: dict[Path, Element] = {}
 
+def _parse(path: Path) -> ElementTree:
+    if path in _etree_cache:
+        return _etree_cache[path]
 
-def _parse(fname: Path) -> Element:
-    if fname in etree_cache:
-        return etree_cache[fname]
-    with fname.open('rb') as fp:
-        etree = HTMLParser(namespaceHTMLElements=False).parse(fp)
-        etree_cache[fname] = etree
-        return etree
+    _etree_cache[path] = tree = etree_parse(path)
+    return tree
 
 
 @pytest.fixture(scope='package')
-def cached_etree_parse() -> Generator[Callable[[Path], Element], None, None]:
+def cached_etree_parse() -> Generator[Callable[[Path], ElementTree], None, None]:
     yield _parse
-    etree_cache.clear()
+    _etree_cache.clear()
@@ -12,43 +12,8 @@
 from sphinx.util.console import strip_colors
 from sphinx.util.inventory import InventoryFile
 
-FIGURE_CAPTION = ".//figure/figcaption/p"
-
-
-def check_xpath(etree, fname, path, check, be_found=True):
-    nodes = list(etree.findall(path))
-    if check is None:
-        assert nodes == [], ('found any nodes matching xpath '
-                             f'{path!r} in file {fname}')
-        return
-    else:
-        assert nodes != [], ('did not find any node matching xpath '
-                             f'{path!r} in file {fname}')
-    if callable(check):
-        check(nodes)
-    elif not check:
-        # only check for node presence
-        pass
-    else:
-        def get_text(node):
-            if node.text is not None:
-                # the node has only one text
-                return node.text
-            else:
-                # the node has tags and text; gather texts just under the node
-                return ''.join(n.tail or '' for n in node)
-
-        rex = re.compile(check)
-        if be_found:
-            if any(rex.search(get_text(node)) for node in nodes):
-                return
-        else:
-            if all(not rex.search(get_text(node)) for node in nodes):
-                return
-
-        msg = (f'{check!r} not found in any node matching '
-               f'{path!r} in file {fname}: {[node.text for node in nodes]!r}')
-        raise AssertionError(msg)
+from tests.test_builders.xpath_data import FIGURE_CAPTION
+from tests.test_builders.xpath_util import check_xpath
 
 
 def test_html4_error(make_app, tmp_path):
 
@@ -4,7 +4,7 @@
 
 import pytest
 
-from tests.test_builders.test_build_html import check_xpath
+from tests.test_builders.xpath_util import check_xpath
 
 
 def tail_check(check):
@@ -128,7 +128,7 @@ def checker(nodes):
     # ``seealso`` directive
     ('markup.html', ".//div/p[@class='admonition-title']", 'See also'),
     # a ``hlist`` directive
-    ('markup.html', ".//table[@class='hlist']/tbody/tr/td/ul/li/p", '^This$'),
+    ('markup.html', ".//table[@class='hlist']/tr/td/ul/li/p", '^This$'),
     # a ``centered`` directive
     ('markup.html', ".//p[@class='centered']/strong", 'LICENSE'),
     # a glossary
 
@@ -5,7 +5,8 @@
 
 import pytest
 
-from tests.test_builders.test_build_html import FIGURE_CAPTION, check_xpath
+from tests.test_builders.xpath_data import FIGURE_CAPTION
+from tests.test_builders.xpath_util import check_xpath
 
 
 @pytest.mark.sphinx('html', testroot='numfig')
 
@@ -2,7 +2,7 @@
 
 import pytest
 
-from tests.test_builders.test_build_html import check_xpath
+from tests.test_builders.xpath_util import check_xpath
 
 
 @pytest.mark.parametrize(("fname", "path", "check", "be_found"), [
 
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Final
+
+FIGURE_CAPTION: Final[str] = ".//figure/figcaption/p"
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import re
+import textwrap
+from typing import TYPE_CHECKING
+from xml.etree.ElementTree import tostring
+
+if TYPE_CHECKING:
+    import os
+    from collections.abc import Callable, Iterable, Sequence
+    from xml.etree.ElementTree import Element, ElementTree
+
+
+def _get_text(node: Element) -> str:
+    if node.text is not None:
+        # the node has only one text
+        return node.text
+
+    # the node has tags and text; gather texts just under the node
+    return ''.join(n.tail or '' for n in node)
+
+
+def _prettify(nodes: Iterable[Element]) -> str:
+    def pformat(node: Element) -> str:
+        return tostring(node, encoding='unicode', method='html')
+
+    return ''.join(f'(i={index}) {pformat(node)}\n' for index, node in enumerate(nodes))
+
+
+def check_xpath(
+    etree: ElementTree,
+    filename: str | os.PathLike[str],
+    xpath: str,
+    check: str | re.Pattern[str] | Callable[[Sequence[Element]], None] | None,
+    be_found: bool = True,
+    *,
+    min_count: int = 1,
+) -> None:
+    """Check that one or more nodes satisfy a predicate.
+
+    :param etree: The element tree.
+    :param filename: The element tree source name (for errors only).
+    :param xpath: An XPath expression to use.
+    :param check: Optional regular expression or a predicate the nodes must validate.
+    :param be_found: If false, negate the predicate.
+    :param min_count: Minimum number of nodes expected to satisfy the predicate.
+
+    * If *check* is empty (``''``), only the minimum count is checked.
+    * If *check* is ``None``, no node should satisfy the XPath expression.
+    """
+    nodes = etree.findall(xpath)
+    assert isinstance(nodes, list)
+
+    if check is None:
+        # use == to have a nice pytest diff
+        assert nodes == [], f'found nodes matching xpath {xpath!r} in file {filename}'
+        return
+
+    assert len(nodes) >= min_count, (f'expecting at least {min_count} node(s) '
+                                     f'to satisfy {xpath!r} in file {filename}')
+
+    if check == '':
+        return
+
+    if callable(check):
+        check(nodes)
+        return
+
+    rex = re.compile(check)
+    if be_found:
+        if any(rex.search(_get_text(node)) for node in nodes):
+            return
+    else:
+        if all(not rex.search(_get_text(node)) for node in nodes):
+            return
+
+    ctx = textwrap.indent(_prettify(nodes), ' ' * 2)
+    msg = f'{check!r} not found in any node matching {xpath!r} in file {filename}:\n{ctx}'
+    raise AssertionError(msg)
@@ -5,7 +5,6 @@
 import pytest
 from docutils import nodes
 from docutils.nodes import definition, definition_list, definition_list_item, term
-from html5lib import HTMLParser
 
 from sphinx import addnodes
 from sphinx.addnodes import (
@@ -20,7 +19,7 @@
 )
 from sphinx.domains.std import StandardDomain
 from sphinx.testing import restructuredtext
-from sphinx.testing.util import assert_node
+from sphinx.testing.util import assert_node, etree_parse
 
 
 def test_process_doc_handle_figure_caption():
@@ -375,9 +374,11 @@ def test_productionlist(app, status, warning):
     assert warnings[-1] == ''
     assert "Dup2.rst:4: WARNING: duplicate token description of Dup, other instance in Dup1" in warnings[0]
 
-    with (app.outdir / 'index.html').open('rb') as f:
-        etree = HTMLParser(namespaceHTMLElements=False).parse(f)
-    ul = list(etree.iter('ul'))[1]
+    etree = etree_parse(app.outdir / 'index.html')
+    nodes = list(etree.iter('ul'))
+    assert len(nodes) >= 2
+
+    ul = nodes[1]
     cases = []
     for li in list(ul):
         assert len(list(li)) == 1
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,6 @@ lint = [`
`91`	`91`	`]`
`92`	`92`	`test = [`
`93`	`93`	`"pytest>=6.0",`
`94`		`- "html5lib",`
`95`	`94`	`"defusedxml>=0.7.1", # for secure XML/HTML parsing`
`96`	`95`	`"cython>=3.0",`
`97`	`96`	`"setuptools>=67.0", # for Cython compilation`