Skip to content

Commit 1ae139b

Browse files
committed
More tests
1 parent 55f7710 commit 1ae139b

File tree

2 files changed

+188
-11
lines changed

2 files changed

+188
-11
lines changed

selectolax/lexbor/node.pxi

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ cdef class LexborNode:
664664
if isinstance(value, (str, bytes, unicode)):
665665
bytes_val = to_bytes(value)
666666
new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
667-
&self.parser.document.dom_document,
667+
&self.parser.main_document().dom_document,
668668
<lxb_char_t *> bytes_val, len(bytes_val)
669669
)
670670
if new_node == NULL:
@@ -673,7 +673,7 @@ cdef class LexborNode:
673673
lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
674674
elif isinstance(value, LexborNode):
675675
new_node = lxb_dom_document_import_node(
676-
&self.parser.document.dom_document,
676+
&self.parser.main_document().dom_document,
677677
<lxb_dom_node_t *> value.node,
678678
<bint> True
679679
)
@@ -717,15 +717,15 @@ cdef class LexborNode:
717717
if isinstance(value, (str, bytes, unicode)):
718718
bytes_val = to_bytes(value)
719719
new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
720-
&self.parser.document.dom_document,
720+
&self.parser.main_document().dom_document,
721721
<lxb_char_t *> bytes_val, len(bytes_val)
722722
)
723723
if new_node == NULL:
724724
raise SelectolaxError("Can't create a new node")
725725
lxb_dom_node_insert_before(self.node, new_node)
726726
elif isinstance(value, LexborNode):
727727
new_node = lxb_dom_document_import_node(
728-
&self.parser.document.dom_document,
728+
&self.parser.main_document().dom_document,
729729
<lxb_dom_node_t *> value.node,
730730
<bint> True
731731
)
@@ -768,15 +768,15 @@ cdef class LexborNode:
768768
if isinstance(value, (str, bytes, unicode)):
769769
bytes_val = to_bytes(value)
770770
new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
771-
&self.parser.document.dom_document,
771+
&self.parser.main_document().dom_document,
772772
<lxb_char_t *> bytes_val, len(bytes_val)
773773
)
774774
if new_node == NULL:
775775
raise SelectolaxError("Can't create a new node")
776776
lxb_dom_node_insert_after(self.node, new_node)
777777
elif isinstance(value, LexborNode):
778778
new_node = lxb_dom_document_import_node(
779-
&self.parser.document.dom_document,
779+
&self.parser.main_document().dom_document,
780780
<lxb_dom_node_t *> value.node,
781781
<bint> True
782782
)
@@ -827,7 +827,7 @@ cdef class LexborNode:
827827
lxb_dom_node_insert_child(self.node, new_node)
828828
elif isinstance(value, LexborNode):
829829
new_node = lxb_dom_document_import_node(
830-
&self.parser.document.dom_document,
830+
&self.parser.main_document().dom_document,
831831
<lxb_dom_node_t *> value.node,
832832
<bint> True
833833
)

tests/test_lexbor_fragment.py

Lines changed: 181 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22
import pytest
33
from selectolax.lexbor import LexborHTMLParser
44

5-
# TODO:
6-
# 1) .clone on document, .clone on node
7-
# 2) Any kind of tree modification
8-
95

106
def clean_doc(text: str) -> str:
117
return f"{cleandoc(text)}\n"
@@ -102,3 +98,184 @@ def test_insert_node_fragment_parser():
10298
p = LexborHTMLParser(html, is_fragment=True)
10399
p.root.insert_child("text")
104100
assert p.html == "<div>text</div>"
101+
102+
103+
def test_insert_before_fragment_parser():
104+
html = "<div><span></span></div>"
105+
p = LexborHTMLParser(html, is_fragment=True)
106+
span = p.root.css_first("span")
107+
span.insert_before("text")
108+
assert p.html == "<div>text<span></span></div>"
109+
110+
111+
def test_insert_after_fragment_parser():
112+
html = "<div><span></span></div>"
113+
p = LexborHTMLParser(html, is_fragment=True)
114+
span = p.root.css_first("span")
115+
span.insert_after("text")
116+
assert p.html == "<div><span></span>text</div>"
117+
118+
119+
def test_clone_parser_fragment():
120+
html = "<div><span>Hello</span><p>World</p></div>"
121+
p = LexborHTMLParser(html, is_fragment=True)
122+
cloned = p.clone()
123+
assert cloned.html == p.html
124+
assert cloned is not p
125+
126+
cloned.root.css_first("span").insert_child("!")
127+
assert cloned.html == "<div><span>Hello!</span><p>World</p></div>"
128+
assert p.html == "<div><span>Hello</span><p>World</p></div>"
129+
130+
131+
def test_clone_node_fragment():
132+
html = "<div><span>Hello</span><p>World</p></div>"
133+
p = LexborHTMLParser(html, is_fragment=True)
134+
span = p.root.css_first("span")
135+
cloned_span = span.clone()
136+
assert cloned_span.html == span.html
137+
assert cloned_span is not span
138+
139+
cloned_span.insert_child("!")
140+
assert cloned_span.html == "<span>Hello!</span>"
141+
assert span.html == "<span>Hello</span>"
142+
143+
144+
def test_fragment_root_html_serialization():
145+
html = "<div>Hello</div><span>World</span>"
146+
p = LexborHTMLParser(html, is_fragment=True)
147+
assert p.root.html == "<div>Hello</div><span>World</span>"
148+
p.root.insert_child("!")
149+
assert p.html == "<div>Hello!</div><span>World</span>"
150+
151+
152+
def test_fragment_node_properties():
153+
html = "<div>Hello</div><span>World</span>"
154+
p = LexborHTMLParser(html, is_fragment=True)
155+
div = p.root
156+
span = p.root.next
157+
158+
assert div.is_element_node is True
159+
assert div.is_text_node is False
160+
assert div.is_comment_node is False
161+
162+
assert span.is_element_node is True
163+
assert span.is_text_node is False
164+
assert span.is_comment_node is False
165+
166+
text_node = div.first_child
167+
assert text_node.is_element_node is False
168+
assert text_node.is_text_node is True
169+
assert text_node.is_comment_node is False
170+
171+
172+
def test_fragment_text_extraction():
173+
html = "<div>Hello <strong>World</strong>!</div>"
174+
p = LexborHTMLParser(html, is_fragment=True)
175+
div = p.root.css_first("div")
176+
assert div.text() == "Hello World!"
177+
assert div.text(deep=True, separator=" ", strip=True) == "Hello World !"
178+
179+
180+
def test_fragment_traversal():
181+
html = "<div><span>Hello</span><p>World</p></div>"
182+
p = LexborHTMLParser(html, is_fragment=True)
183+
nodes = list(p.root.traverse(include_text=True))
184+
assert len(nodes) == 5
185+
assert nodes[0].tag == "div"
186+
assert nodes[1].tag == "span"
187+
assert nodes[2].tag == "-text"
188+
assert nodes[3].tag == "p"
189+
assert nodes[4].tag == "-text"
190+
191+
192+
def test_fragment_inner_html():
193+
html = "<div><span>Hello</span><p>World</p></div>"
194+
p = LexborHTMLParser(html, is_fragment=True)
195+
div = p.root.css_first("div")
196+
assert div.inner_html == "<span>Hello</span><p>World</p>"
197+
div.inner_html = "<em>New</em> content"
198+
assert div.html == "<div><em>New</em> content</div>"
199+
200+
201+
def test_fragment_node_operations_combined():
202+
html = "<div><span>Hello</span></div>"
203+
p = LexborHTMLParser(html, is_fragment=True)
204+
span = p.root.css_first("span")
205+
span.replace_with("Replaced")
206+
assert p.html == "<div>Replaced</div>"
207+
208+
html2 = "<div><span></span></div>"
209+
p2 = LexborHTMLParser(html2, is_fragment=True)
210+
span2 = p2.root.css_first("span")
211+
span2.insert_before("Before")
212+
span2.insert_after("After")
213+
assert p2.html == "<div>Before<span></span>After</div>"
214+
215+
216+
def test_fragment_replace_with_node():
217+
html = "<div><span>Hello</span></div>"
218+
parser = LexborHTMLParser(html, is_fragment=True)
219+
replacement_html = "<em>Replaced</em>"
220+
replacement_parser = LexborHTMLParser(replacement_html, is_fragment=True)
221+
span = parser.root.css_first("span")
222+
span.replace_with(replacement_parser.root)
223+
assert parser.html == "<div><em>Replaced</em></div>"
224+
225+
226+
def test_fragment_insert_before_node():
227+
base_html = "<div><span></span></div>"
228+
base_parser = LexborHTMLParser(base_html, is_fragment=True)
229+
before_html = "<strong>Before</strong>"
230+
before_parser = LexborHTMLParser(before_html, is_fragment=True)
231+
span = base_parser.root.css_first("span")
232+
span.insert_before(before_parser.root)
233+
assert base_parser.html == "<div><strong>Before</strong><span></span></div>"
234+
235+
236+
def test_fragment_insert_after_node():
237+
base_html = "<div><span></span></div>"
238+
base_parser = LexborHTMLParser(base_html, is_fragment=True)
239+
after_html = "<em>After</em>"
240+
after_parser = LexborHTMLParser(after_html, is_fragment=True)
241+
span = base_parser.root.css_first("span")
242+
span.insert_after(after_parser.root)
243+
assert base_parser.html == "<div><span></span><em>After</em></div>"
244+
245+
246+
def test_fragment_insert_child_node():
247+
base_html = "<div></div>"
248+
base_parser = LexborHTMLParser(base_html, is_fragment=True)
249+
child_html = "<p>Child</p>"
250+
child_parser = LexborHTMLParser(child_html, is_fragment=True)
251+
div = base_parser.root.css_first("div")
252+
div.insert_child(child_parser.root)
253+
assert base_parser.html == "<div><p>Child</p></div>"
254+
255+
256+
def test_fragment_strip_tags():
257+
html = "<div><script>alert('test')</script><p>Hello</p><style>body { color: red; }</style></div>"
258+
parser = LexborHTMLParser(html, is_fragment=True)
259+
parser.root.strip_tags(["script", "style"])
260+
assert parser.html == "<div><p>Hello</p></div>"
261+
262+
263+
def test_fragment_decompose():
264+
html = "<div><script>alert('test')</script><p>Hello</p></div>"
265+
parser = LexborHTMLParser(html, is_fragment=True)
266+
script = parser.root.css_first("script")
267+
script.decompose()
268+
assert parser.html == "<div><p>Hello</p></div>"
269+
270+
271+
@pytest.mark.parametrize(
272+
"input_html, expected",
273+
[
274+
("<html><body><div>test</div></body></html>", "<div>test</div>"),
275+
("<head><title>test</title></head>", "<title>test</title>"),
276+
("<body><p>test</p></body>", "<p>test</p>"),
277+
],
278+
)
279+
def test_fragment_strips_top_level_tags(input_html, expected):
280+
parser = LexborHTMLParser(input_html, is_fragment=True)
281+
assert parser.html == expected

0 commit comments

Comments
 (0)