Skip to content

Commit b376686

Browse files
authored
Merge pull request #124 from akshayphilar/remove-null-bytes
[MRG+1] remove null bytes from html body
2 parents d4fa21c + aa4cc76 commit b376686

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

parsel/selector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def _st(st):
3838
def create_root_node(text, parser_cls, base_url=None):
3939
"""Create root node for text using given parser class.
4040
"""
41-
body = text.strip().encode('utf8') or b'<html/>'
41+
body = text.strip().replace('\x00', '').encode('utf8') or b'<html/>'
4242
parser = parser_cls(recover=True, encoding='utf8')
4343
root = etree.fromstring(body, parser=parser, base_url=base_url)
4444
if root is None:

tests/test_selector.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,6 @@ def test_configure_base_url(self):
694694
sel = self.sscls(text=u'nothing', base_url='http://example.com')
695695
self.assertEqual(u'http://example.com', sel.root.base)
696696

697-
698697
def test_extending_selector(self):
699698
class MySelectorList(Selector.selectorlist_cls):
700699
pass
@@ -708,6 +707,11 @@ class MySelector(Selector):
708707
self.assertIsInstance(sel.css('div'), MySelectorList)
709708
self.assertIsInstance(sel.css('div')[0], MySelector)
710709

710+
def test_replacement_null_char_from_body(self):
711+
text = u'<html>\x00<body><p>Grainy</p></body></html>'
712+
self.assertEqual(u'<html><body><p>Grainy</p></body></html>',
713+
self.sscls(text).extract())
714+
711715
class ExsltTestCase(unittest.TestCase):
712716

713717
sscls = Selector

0 commit comments

Comments
 (0)