Skip to content

Commit e67a408

Browse files
committed
Add create_tag for LexborHTMLParser
1 parent 112d30d commit e67a408

File tree

5 files changed

+184
-6
lines changed

5 files changed

+184
-6
lines changed

selectolax/lexbor.pyi

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,11 +1254,42 @@ class LexborHTMLParser:
12541254
None
12551255
"""
12561256
...
1257+
def create_node(self, tag: str) -> LexborNode:
1258+
"""Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
1259+
e.g. `"<div></div>"`.
1260+
1261+
1262+
Parameters
1263+
----------
1264+
tag_name : str
1265+
Name of the tag to create.
1266+
1267+
Returns
1268+
-------
1269+
LexborNode
1270+
Newly created element node.
1271+
Raises
1272+
------
1273+
SelectolaxError
1274+
If the element cannot be created.
1275+
1276+
Examples
1277+
--------
1278+
>>> parser = LexborHTMLParser("<div></div>")
1279+
>>> new_node = parser.create_node("span")
1280+
>>> new_node.tag_name
1281+
'span'
1282+
>>> parser.css_first("div").append_child(new_node)
1283+
>>> parser.html
1284+
'<html><head></head><body><div><span></span></div></body></html>'
1285+
"""
12571286

12581287
def create_tag(tag: str) -> LexborNode:
12591288
"""
12601289
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
12611290
e.g. `"<div></div>"`.
1291+
1292+
Use `LexborHTMLParser().create_node(..)` if you need to create a node tied to a specific parser instance.
12621293
"""
12631294
...
12641295

selectolax/lexbor.pyx

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,3 +704,51 @@ cdef class LexborHTMLParser:
704704
None
705705
"""
706706
self.root.inner_html = html
707+
708+
def create_node(self, str tag):
709+
"""Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
710+
e.g. `"<div></div>"`.
711+
712+
Parameters
713+
----------
714+
tag_name : str
715+
Name of the tag to create.
716+
717+
Returns
718+
-------
719+
LexborNode
720+
Newly created element node.
721+
Raises
722+
------
723+
SelectolaxError
724+
If the element cannot be created.
725+
726+
Examples
727+
--------
728+
>>> parser = LexborHTMLParser("<div></div>")
729+
>>> new_node = parser.create_node("span")
730+
>>> new_node.tag_name
731+
'span'
732+
>>> parser.root.append_child(new_node)
733+
>>> parser.html
734+
'<html><head></head><body><div><span></span></div></body></html>'
735+
"""
736+
cdef lxb_html_element_t* element
737+
cdef lxb_dom_node_t* dom_node
738+
if not tag_name:
739+
raise SelectolaxError("Tag name cannot be empty")
740+
pybyte_name = tag_name.encode('UTF-8')
741+
742+
element = lxb_html_document_create_element(
743+
self.document,
744+
<const lxb_char_t *> pybyte_name,
745+
len(pybyte_name),
746+
NULL
747+
)
748+
749+
if element == NULL:
750+
raise SelectolaxError(f"Can't create element for tag '{tag_name}'")
751+
752+
dom_node = <lxb_dom_node_t *> element
753+
754+
return LexborNode.new(dom_node, self)

selectolax/lexbor/util.pxi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ def create_tag(tag: str):
77
"""
88
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
99
e.g. `"<div></div>"`.
10+
11+
Use `LexborHTMLParser().create_node(..)` if you need to create a node tied to a specific parser instance.
1012
"""
1113
return LexborHTMLParser(f"<{tag}></{tag}>", is_fragment=True).root
1214

tests/test_lexbor.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,6 @@ def test_double_unwrap_prevention():
691691

692692

693693
def test_clone_complex_modifications():
694-
"""Test cloning with complex document modifications."""
695694
html = "<div><p>Original</p><span>Content</span></div>"
696695
parser = LexborHTMLParser(html)
697696

@@ -710,3 +709,53 @@ def test_clone_complex_modifications():
710709

711710
cloned_text = cloned.root.text()
712711
assert "Modified" not in cloned_text
712+
713+
714+
def test_create_node_basic():
715+
parser = LexborHTMLParser("<div></div>")
716+
new_node = parser.create_node("span")
717+
assert new_node.tag == "span"
718+
assert new_node.parent is None
719+
720+
parser.css_first("div").insert_child(new_node)
721+
expected_html = "<html><head></head><body><div><span></span></div></body></html>"
722+
assert parser.html == expected_html
723+
724+
725+
def test_create_node_different_tags():
726+
parser = LexborHTMLParser("<div></div>")
727+
root = parser.root
728+
assert root is not None
729+
730+
tags_to_test = ["p", "span", "div", "h1", "custom-tag"]
731+
for tag in tags_to_test:
732+
new_node = parser.create_node(tag)
733+
assert new_node.tag == tag
734+
root.insert_child(new_node)
735+
736+
html = parser.html
737+
assert html is not None
738+
for tag in tags_to_test:
739+
assert f"<{tag}></{tag}>" in html
740+
741+
742+
def test_create_node_with_attributes():
743+
parser = LexborHTMLParser("<div></div>")
744+
new_node = parser.create_node("a")
745+
new_node.attrs["href"] = "https://example.com"
746+
new_node.attrs["class"] = "link"
747+
748+
parser.root.insert_child(new_node)
749+
html = parser.html
750+
assert html is not None
751+
assert 'href="https://example.com"' in html
752+
assert 'class="link"' in html
753+
754+
755+
def test_create_node_empty_tag_name():
756+
parser = LexborHTMLParser("<div></div>")
757+
try:
758+
parser.create_node("")
759+
assert False, "Should have raised an exception"
760+
except SelectolaxError:
761+
pass

tests/test_lexbor_fragment.py

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from inspect import cleandoc
22
import pytest
3-
from selectolax.lexbor import LexborHTMLParser
3+
from selectolax.lexbor import LexborHTMLParser, SelectolaxError
44

55

66
def clean_doc(text: str) -> str:
@@ -418,14 +418,12 @@ def test_attributes_access_on_non_element():
418418
],
419419
)
420420
def test_fragment_parsing_malformed_html(malformed_html):
421-
"""Test fragment parsing with malformed HTML."""
422421
parser = LexborHTMLParser(malformed_html, is_fragment=True)
423422
html_result = parser.html
424423
assert html_result is None or isinstance(html_result, str)
425424

426425

427426
def test_fragment_only_text():
428-
"""Test fragment parsing with only text."""
429427
text_only = "Just plain text"
430428
parser = LexborHTMLParser(text_only, is_fragment=True)
431429
html_result = parser.html
@@ -434,7 +432,6 @@ def test_fragment_only_text():
434432

435433

436434
def test_fragment_only_comment():
437-
"""Test fragment parsing with only comment."""
438435
comment_only = "<!-- Just a comment -->"
439436
parser = LexborHTMLParser(comment_only, is_fragment=True)
440437
html_result = parser.html
@@ -443,10 +440,61 @@ def test_fragment_only_comment():
443440

444441

445442
def test_fragment_mixed_content():
446-
"""Test fragment parsing with mixed content."""
447443
mixed = "Text <!-- comment --> <div>element</div> more text"
448444
parser = LexborHTMLParser(mixed, is_fragment=True)
449445
html_result = parser.html
450446
assert html_result is not None
451447
assert "Text" in html_result
452448
assert "element" in html_result
449+
450+
451+
def test_fragment_create_node_basic():
452+
parser = LexborHTMLParser("<div></div>", is_fragment=True)
453+
assert parser.root is not None
454+
new_node = parser.create_node("span")
455+
assert new_node.tag == "span"
456+
assert new_node.parent is None
457+
458+
parser.root.insert_child(new_node)
459+
expected_html = "<div><span></span></div>"
460+
assert parser.html == expected_html
461+
462+
463+
def test_fragment_create_node_different_tags():
464+
parser = LexborHTMLParser("<div></div>", is_fragment=True)
465+
root = parser.root
466+
assert root is not None
467+
468+
tags_to_test = ["p", "span", "div", "h1", "custom-tag"]
469+
for tag in tags_to_test:
470+
new_node = parser.create_node(tag)
471+
assert new_node.tag == tag
472+
root.insert_child(new_node)
473+
474+
html = parser.html
475+
assert html is not None
476+
for tag in tags_to_test:
477+
assert f"<{tag}></{tag}>" in html
478+
479+
480+
def test_fragment_create_node_with_attributes():
481+
parser = LexborHTMLParser("<div></div>", is_fragment=True)
482+
assert parser.root is not None
483+
new_node = parser.create_node("a")
484+
new_node.attrs["href"] = "https://example.com"
485+
new_node.attrs["class"] = "link"
486+
487+
parser.root.insert_child(new_node)
488+
html = parser.html
489+
assert html is not None
490+
assert 'href="https://example.com"' in html
491+
assert 'class="link"' in html
492+
493+
494+
def test_fragment_create_node_empty_tag_name():
495+
parser = LexborHTMLParser("<div></div>", is_fragment=True)
496+
try:
497+
parser.create_node("")
498+
assert False, "Should have raised an exception"
499+
except SelectolaxError:
500+
pass

0 commit comments

Comments
 (0)