|
2 | 2 |
|
3 | 3 | from inspect import cleandoc |
4 | 4 |
|
| 5 | +import pytest |
| 6 | + |
5 | 7 | from selectolax.lexbor import LexborHTMLParser, parse_fragment |
6 | 8 |
|
7 | 9 |
|
@@ -218,7 +220,7 @@ def test_comment_content_property() -> None: |
218 | 220 | assert text_node.comment_content == "hello" |
219 | 221 |
|
220 | 222 |
|
221 | | -def test_parser_without_top_level_tags(): |
| 223 | +def test_fragment_parser_top_level_tags(): |
222 | 224 | parser = LexborHTMLParser( |
223 | 225 | "<div><span>\n \n</span><title>X</title></div>", is_fragment=False |
224 | 226 | ) |
@@ -267,11 +269,38 @@ def test_fragment_parser_multiple_nodes_on_the_same_level(): |
267 | 269 | assert parser.html == expected_html |
268 | 270 |
|
269 | 271 |
|
270 | | -def test_fragmented_parser_whole_doc(): |
| 272 | +def test_fragment_parser_whole_doc(): |
271 | 273 | html = """<html lang="en"> |
272 | 274 | <head><meta charset="utf-8"><title>Title!</title></head> |
273 | 275 | <body><p>Lorem <strong>Ipsum</strong>!</p></body> |
274 | 276 | </html>""" |
275 | 277 | parser = LexborHTMLParser(html, is_fragment=True) |
276 | 278 | expected_html = '<meta charset="utf-8"><title>Title!</title>\n <p>Lorem <strong>Ipsum</strong>!</p>' |
277 | | - assert parser.html.strip() == expected_html |
| 279 | + html = parser.html |
| 280 | + assert html is not None |
| 281 | + assert html.strip() == expected_html |
| 282 | + |
| 283 | + |
| 284 | +def test_fragment_parser_empty_doc(): |
| 285 | + html = "" |
| 286 | + parser = LexborHTMLParser(html, is_fragment=True) |
| 287 | + assert parser.html is None |
| 288 | + |
| 289 | + |
| 290 | +@pytest.mark.parametrize( |
| 291 | + "html, expected_html", |
| 292 | + [ |
| 293 | + ("<body><div>Test</div></body>", "<div>Test</div>"), |
| 294 | + (" <div>Lorep Ipsum</div>", " <div>Lorep Ipsum</div>"), |
| 295 | + ("<div>Lorem</div><div>Ipsum</div>", "<div>Lorem</div><div>Ipsum</div>"), |
| 296 | + (" \n <div>Lorem Ipsum</div> \t ", " \n <div>Lorem Ipsum</div> \t "), |
| 297 | + ("<!-- Comment --><div>Content</div>", "<!-- Comment --><div>Content</div>"), |
| 298 | + ( |
| 299 | + "<template><p>Inside Template</p></template>", |
| 300 | + "<template><p>Inside Template</p></template>", |
| 301 | + ), |
| 302 | + ], |
| 303 | +) |
| 304 | +def test_fragment_parser(html, expected_html): |
| 305 | + parser = LexborHTMLParser(html, is_fragment=True) |
| 306 | + assert parser.html == expected_html |
0 commit comments