@@ -38,6 +38,8 @@ def test_declared_encoding(all_options):
3838
3939def test_empty (all_options ):
4040 assert extract_text (u'' , ** all_options ) == ''
41+ assert extract_text (u' ' , ** all_options ) == ''
42+ assert extract_text (None , ** all_options ) == ''
4143
4244
4345def test_extract_text_from_tree (all_options ):
@@ -47,6 +49,14 @@ def test_extract_text_from_tree(all_options):
4749 assert extract_text (tree , ** all_options ) == u'Hello, world!'
4850
4951
52+ def test_extract_text_from_node (all_options ):
53+ html = (u'<html><style>.div {}</style>'
54+ '<body><p>Hello, world!</p></body></html>' )
55+ tree = parse_html (html )
56+ node = tree .xpath ('//p' )[0 ]
57+ assert extract_text (node , ** all_options ) == u'Hello, world!'
58+
59+
5060def test_inline_tags_whitespace (all_options ):
5161 html = u'<span>field</span><span>value of</span><span></span>'
5262 assert extract_text (html , ** all_options ) == u'field value of'
@@ -79,17 +89,22 @@ def test_bad_punct_whitespace():
7989 assert text == u'trees = webstruct.load_trees("train/*.html")'
8090
8191
82- def test_selector (all_options ):
92+ def test_selectors (all_options ):
8393 html = (u'<span><span id="extract-me">text<a>more</a>'
8494 '</span>and more text <a> and some more</a> <a></a> </span>' )
95+ # Selector
8596 sel = cleaned_selector (html )
8697 assert selector_to_text (sel , ** all_options ) == 'text more and more text and some more'
98+
99+ # SelectorList
87100 subsel = sel .xpath ('//span[@id="extract-me"]' )
88101 assert selector_to_text (subsel , ** all_options ) == 'text more'
89102 subsel = sel .xpath ('//a' )
90103 assert selector_to_text (subsel , ** all_options ) == 'more and some more'
91104 subsel = sel .xpath ('//a[@id="extract-me"]' )
92105 assert selector_to_text (subsel , ** all_options ) == ''
106+ subsel = sel .xpath ('//foo' )
107+ assert selector_to_text (subsel , ** all_options ) == ''
93108
94109
95110def test_guess_layout ():
0 commit comments