Skip to content

Commit 885818b

Browse files
authored
[tests] move utilities and static data into dedicated modules and remove html5lib (#12173)
Since #12168, HTML files are now XML compliant, hence ``html5lib`` is no more needed as a testing dependencies.
1 parent 9e23972 commit 885818b

28 files changed

+289
-237
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ lint = [
9191
]
9292
test = [
9393
"pytest>=6.0",
94-
"html5lib",
9594
"defusedxml>=0.7.1", # for secure XML/HTML parsing
9695
"cython>=3.0",
9796
"setuptools>=67.0", # for Cython compilation

sphinx/testing/util.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77
import contextlib
88
import os
99
import sys
10-
import warnings
1110
from io import StringIO
1211
from types import MappingProxyType
1312
from typing import TYPE_CHECKING
14-
from xml.etree import ElementTree
1513

14+
from defusedxml.ElementTree import parse as xml_parse
1615
from docutils import nodes
1716
from docutils.parsers.rst import directives, roles
1817

@@ -26,6 +25,7 @@
2625
from collections.abc import Mapping
2726
from pathlib import Path
2827
from typing import Any, Final
28+
from xml.etree.ElementTree import ElementTree
2929

3030
from docutils.nodes import Node
3131

@@ -70,10 +70,10 @@ def assert_node(node: Node, cls: Any = None, xpath: str = "", **kwargs: Any) ->
7070
f'The node{xpath}[{key}] is not {value!r}: {node[key]!r}'
7171

7272

73-
def etree_parse(path: str) -> Any:
74-
with warnings.catch_warnings(record=False):
75-
warnings.filterwarnings("ignore", category=DeprecationWarning)
76-
return ElementTree.parse(path) # NoQA: S314 # using known data in tests
73+
# keep this to restrict the API usage and to have a correct return type
74+
def etree_parse(path: str | os.PathLike[str]) -> ElementTree:
75+
"""Parse a file into a (safe) XML element tree."""
76+
return xml_parse(path)
7777

7878

7979
class SphinxTestApp(sphinx.application.Sphinx):

tests/test_builders/conftest.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,26 @@
33
from typing import TYPE_CHECKING
44

55
import pytest
6-
from html5lib import HTMLParser
6+
7+
from sphinx.testing.util import etree_parse
78

89
if TYPE_CHECKING:
910
from collections.abc import Callable, Generator
1011
from pathlib import Path
11-
from xml.etree.ElementTree import Element
12+
from xml.etree.ElementTree import ElementTree
13+
14+
_etree_cache: dict[Path, ElementTree] = {}
1215

13-
etree_cache: dict[Path, Element] = {}
1416

17+
def _parse(path: Path) -> ElementTree:
18+
if path in _etree_cache:
19+
return _etree_cache[path]
1520

16-
def _parse(fname: Path) -> Element:
17-
if fname in etree_cache:
18-
return etree_cache[fname]
19-
with fname.open('rb') as fp:
20-
etree = HTMLParser(namespaceHTMLElements=False).parse(fp)
21-
etree_cache[fname] = etree
22-
return etree
21+
_etree_cache[path] = tree = etree_parse(path)
22+
return tree
2323

2424

2525
@pytest.fixture(scope='package')
26-
def cached_etree_parse() -> Generator[Callable[[Path], Element], None, None]:
26+
def cached_etree_parse() -> Generator[Callable[[Path], ElementTree], None, None]:
2727
yield _parse
28-
etree_cache.clear()
28+
_etree_cache.clear()

tests/test_builders/test_build_html.py

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,43 +12,8 @@
1212
from sphinx.util.console import strip_colors
1313
from sphinx.util.inventory import InventoryFile
1414

15-
FIGURE_CAPTION = ".//figure/figcaption/p"
16-
17-
18-
def check_xpath(etree, fname, path, check, be_found=True):
19-
nodes = list(etree.findall(path))
20-
if check is None:
21-
assert nodes == [], ('found any nodes matching xpath '
22-
f'{path!r} in file {fname}')
23-
return
24-
else:
25-
assert nodes != [], ('did not find any node matching xpath '
26-
f'{path!r} in file {fname}')
27-
if callable(check):
28-
check(nodes)
29-
elif not check:
30-
# only check for node presence
31-
pass
32-
else:
33-
def get_text(node):
34-
if node.text is not None:
35-
# the node has only one text
36-
return node.text
37-
else:
38-
# the node has tags and text; gather texts just under the node
39-
return ''.join(n.tail or '' for n in node)
40-
41-
rex = re.compile(check)
42-
if be_found:
43-
if any(rex.search(get_text(node)) for node in nodes):
44-
return
45-
else:
46-
if all(not rex.search(get_text(node)) for node in nodes):
47-
return
48-
49-
msg = (f'{check!r} not found in any node matching '
50-
f'{path!r} in file {fname}: {[node.text for node in nodes]!r}')
51-
raise AssertionError(msg)
15+
from tests.test_builders.xpath_data import FIGURE_CAPTION
16+
from tests.test_builders.xpath_util import check_xpath
5217

5318

5419
def test_html4_error(make_app, tmp_path):

tests/test_builders/test_build_html_5_output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pytest
66

7-
from tests.test_builders.test_build_html import check_xpath
7+
from tests.test_builders.xpath_util import check_xpath
88

99

1010
def tail_check(check):
@@ -128,7 +128,7 @@ def checker(nodes):
128128
# ``seealso`` directive
129129
('markup.html', ".//div/p[@class='admonition-title']", 'See also'),
130130
# a ``hlist`` directive
131-
('markup.html', ".//table[@class='hlist']/tbody/tr/td/ul/li/p", '^This$'),
131+
('markup.html', ".//table[@class='hlist']/tr/td/ul/li/p", '^This$'),
132132
# a ``centered`` directive
133133
('markup.html', ".//p[@class='centered']/strong", 'LICENSE'),
134134
# a glossary

tests/test_builders/test_build_html_numfig.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
import pytest
77

8-
from tests.test_builders.test_build_html import FIGURE_CAPTION, check_xpath
8+
from tests.test_builders.xpath_data import FIGURE_CAPTION
9+
from tests.test_builders.xpath_util import check_xpath
910

1011

1112
@pytest.mark.sphinx('html', testroot='numfig')

tests/test_builders/test_build_html_tocdepth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pytest
44

5-
from tests.test_builders.test_build_html import check_xpath
5+
from tests.test_builders.xpath_util import check_xpath
66

77

88
@pytest.mark.parametrize(("fname", "path", "check", "be_found"), [

tests/test_builders/xpath_data.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
if TYPE_CHECKING:
6+
from typing import Final
7+
8+
FIGURE_CAPTION: Final[str] = ".//figure/figcaption/p"

tests/test_builders/xpath_util.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from __future__ import annotations
2+
3+
import re
4+
import textwrap
5+
from typing import TYPE_CHECKING
6+
from xml.etree.ElementTree import tostring
7+
8+
if TYPE_CHECKING:
9+
import os
10+
from collections.abc import Callable, Iterable, Sequence
11+
from xml.etree.ElementTree import Element, ElementTree
12+
13+
14+
def _get_text(node: Element) -> str:
15+
if node.text is not None:
16+
# the node has only one text
17+
return node.text
18+
19+
# the node has tags and text; gather texts just under the node
20+
return ''.join(n.tail or '' for n in node)
21+
22+
23+
def _prettify(nodes: Iterable[Element]) -> str:
24+
def pformat(node: Element) -> str:
25+
return tostring(node, encoding='unicode', method='html')
26+
27+
return ''.join(f'(i={index}) {pformat(node)}\n' for index, node in enumerate(nodes))
28+
29+
30+
def check_xpath(
31+
etree: ElementTree,
32+
filename: str | os.PathLike[str],
33+
xpath: str,
34+
check: str | re.Pattern[str] | Callable[[Sequence[Element]], None] | None,
35+
be_found: bool = True,
36+
*,
37+
min_count: int = 1,
38+
) -> None:
39+
"""Check that one or more nodes satisfy a predicate.
40+
41+
:param etree: The element tree.
42+
:param filename: The element tree source name (for errors only).
43+
:param xpath: An XPath expression to use.
44+
:param check: Optional regular expression or a predicate the nodes must validate.
45+
:param be_found: If false, negate the predicate.
46+
:param min_count: Minimum number of nodes expected to satisfy the predicate.
47+
48+
* If *check* is empty (``''``), only the minimum count is checked.
49+
* If *check* is ``None``, no node should satisfy the XPath expression.
50+
"""
51+
nodes = etree.findall(xpath)
52+
assert isinstance(nodes, list)
53+
54+
if check is None:
55+
# use == to have a nice pytest diff
56+
assert nodes == [], f'found nodes matching xpath {xpath!r} in file {filename}'
57+
return
58+
59+
assert len(nodes) >= min_count, (f'expecting at least {min_count} node(s) '
60+
f'to satisfy {xpath!r} in file {filename}')
61+
62+
if check == '':
63+
return
64+
65+
if callable(check):
66+
check(nodes)
67+
return
68+
69+
rex = re.compile(check)
70+
if be_found:
71+
if any(rex.search(_get_text(node)) for node in nodes):
72+
return
73+
else:
74+
if all(not rex.search(_get_text(node)) for node in nodes):
75+
return
76+
77+
ctx = textwrap.indent(_prettify(nodes), ' ' * 2)
78+
msg = f'{check!r} not found in any node matching {xpath!r} in file {filename}:\n{ctx}'
79+
raise AssertionError(msg)

tests/test_domains/test_domain_std.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66
from docutils import nodes
77
from docutils.nodes import definition, definition_list, definition_list_item, term
8-
from html5lib import HTMLParser
98

109
from sphinx import addnodes
1110
from sphinx.addnodes import (
@@ -20,7 +19,7 @@
2019
)
2120
from sphinx.domains.std import StandardDomain
2221
from sphinx.testing import restructuredtext
23-
from sphinx.testing.util import assert_node
22+
from sphinx.testing.util import assert_node, etree_parse
2423

2524

2625
def test_process_doc_handle_figure_caption():
@@ -375,9 +374,11 @@ def test_productionlist(app, status, warning):
375374
assert warnings[-1] == ''
376375
assert "Dup2.rst:4: WARNING: duplicate token description of Dup, other instance in Dup1" in warnings[0]
377376

378-
with (app.outdir / 'index.html').open('rb') as f:
379-
etree = HTMLParser(namespaceHTMLElements=False).parse(f)
380-
ul = list(etree.iter('ul'))[1]
377+
etree = etree_parse(app.outdir / 'index.html')
378+
nodes = list(etree.iter('ul'))
379+
assert len(nodes) >= 2
380+
381+
ul = nodes[1]
381382
cases = []
382383
for li in list(ul):
383384
assert len(list(li)) == 1

0 commit comments

Comments
 (0)