@@ -109,12 +109,13 @@ def get_events(self):
109109
110110class TestCaseBase (unittest .TestCase ):
111111
112- def get_collector (self ):
113- return EventCollector (convert_charrefs = False )
112+ def get_collector (self , convert_charrefs = False ):
113+ return EventCollector (convert_charrefs = convert_charrefs )
114114
115- def _run_check (self , source , expected_events , collector = None ):
115+ def _run_check (self , source , expected_events ,
116+ * , collector = None , convert_charrefs = False ):
116117 if collector is None :
117- collector = self .get_collector ()
118+ collector = self .get_collector (convert_charrefs = convert_charrefs )
118119 parser = collector
119120 for s in source :
120121 parser .feed (s )
@@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None):
128129
129130 def _run_check_extra (self , source , events ):
130131 self ._run_check (source , events ,
131- EventCollectorExtra (convert_charrefs = False ))
132+ collector = EventCollectorExtra (convert_charrefs = False ))
132133
133134
134135class HTMLParserTestCase (TestCaseBase ):
@@ -187,10 +188,87 @@ def test_malformatted_charref(self):
187188 ])
188189
189190 def test_unclosed_entityref (self ):
190- self ._run_check ("&entityref foo" , [
191- ("entityref" , "entityref" ),
192- ("data" , " foo" ),
193- ])
191+ self ._run_check ('> <' , [('entityref' , 'gt' ), ('data' , ' ' ), ('entityref' , 'lt' )],
192+ convert_charrefs = False )
193+ self ._run_check ('> <' , [('data' , '> <' )], convert_charrefs = True )
194+
195+ self ._run_check ('&undefined <' ,
196+ [('entityref' , 'undefined' ), ('data' , ' ' ), ('entityref' , 'lt' )],
197+ convert_charrefs = False )
198+ self ._run_check ('&undefined <' , [('data' , '&undefined <' )],
199+ convert_charrefs = True )
200+
201+ self ._run_check ('>undefined <' ,
202+ [('entityref' , 'gtundefined' ), ('data' , ' ' ), ('entityref' , 'lt' )],
203+ convert_charrefs = False )
204+ self ._run_check ('>undefined <' , [('data' , '>undefined <' )],
205+ convert_charrefs = True )
206+
207+ self ._run_check ('& <' , [('data' , '& ' ), ('entityref' , 'lt' )],
208+ convert_charrefs = False )
209+ self ._run_check ('& <' , [('data' , '& <' )], convert_charrefs = True )
210+
211+ def test_eof_in_entityref (self ):
212+ self ._run_check ('>' , [('entityref' , 'gt' )], convert_charrefs = False )
213+ self ._run_check ('>' , [('data' , '>' )], convert_charrefs = True )
214+
215+ self ._run_check ('&g' , [('entityref' , 'g' )], convert_charrefs = False )
216+ self ._run_check ('&g' , [('data' , '&g' )], convert_charrefs = True )
217+
218+ self ._run_check ('&undefined' , [('entityref' , 'undefined' )],
219+ convert_charrefs = False )
220+ self ._run_check ('&undefined' , [('data' , '&undefined' )],
221+ convert_charrefs = True )
222+
223+ self ._run_check ('>undefined' , [('entityref' , 'gtundefined' )],
224+ convert_charrefs = False )
225+ self ._run_check ('>undefined' , [('data' , '>undefined' )],
226+ convert_charrefs = True )
227+
228+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = False )
229+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = True )
230+
231+ def test_unclosed_charref (self ):
232+ self ._run_check ('{ <' , [('charref' , '123' ), ('data' , ' ' ), ('entityref' , 'lt' )],
233+ convert_charrefs = False )
234+ self ._run_check ('{ <' , [('data' , '{ <' )], convert_charrefs = True )
235+ self ._run_check ('« <' , [('charref' , 'xab' ), ('data' , ' ' ), ('entityref' , 'lt' )],
236+ convert_charrefs = False )
237+ self ._run_check ('« <' , [('data' , '\xab <' )], convert_charrefs = True )
238+
239+ self ._run_check ('� <' ,
240+ [('charref' , '123456789' ), ('data' , ' ' ), ('entityref' , 'lt' )],
241+ convert_charrefs = False )
242+ self ._run_check ('� <' , [('data' , '\ufffd <' )],
243+ convert_charrefs = True )
244+ self ._run_check ('� <' ,
245+ [('charref' , 'x123456789' ), ('data' , ' ' ), ('entityref' , 'lt' )],
246+ convert_charrefs = False )
247+ self ._run_check ('� <' , [('data' , '\ufffd <' )],
248+ convert_charrefs = True )
249+
250+ self ._run_check ('&# <' , [('data' , '&# ' ), ('entityref' , 'lt' )], convert_charrefs = False )
251+ self ._run_check ('&# <' , [('data' , '&# <' )], convert_charrefs = True )
252+ self ._run_check ('&#x <' , [('data' , '&#x ' ), ('entityref' , 'lt' )], convert_charrefs = False )
253+ self ._run_check ('&#x <' , [('data' , '&#x <' )], convert_charrefs = True )
254+
255+ def test_eof_in_charref (self ):
256+ self ._run_check ('{' , [('charref' , '123' )], convert_charrefs = False )
257+ self ._run_check ('{' , [('data' , '{' )], convert_charrefs = True )
258+ self ._run_check ('«' , [('charref' , 'xab' )], convert_charrefs = False )
259+ self ._run_check ('«' , [('data' , '\xab ' )], convert_charrefs = True )
260+
261+ self ._run_check ('�' , [('charref' , '123456789' )],
262+ convert_charrefs = False )
263+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
264+ self ._run_check ('�' , [('charref' , 'x123456789' )],
265+ convert_charrefs = False )
266+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
267+
268+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = False )
269+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = True )
270+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = False )
271+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = True )
194272
195273 def test_bad_nesting (self ):
196274 # Strangely, this *is* supposed to test that overlapping
@@ -762,20 +840,6 @@ def test_correct_detection_of_start_tags(self):
762840 ]
763841 self ._run_check (html , expected )
764842
765- def test_EOF_in_charref (self ):
766- # see #17802
767- # This test checks that the UnboundLocalError reported in the issue
768- # is not raised, however I'm not sure the returned values are correct.
769- # Maybe HTMLParser should use self.unescape for these
770- data = [
771- ('a&' , [('data' , 'a&' )]),
772- ('a&b' , [('data' , 'ab' )]),
773- ('a&b ' , [('data' , 'a' ), ('entityref' , 'b' ), ('data' , ' ' )]),
774- ('a&b;' , [('data' , 'a' ), ('entityref' , 'b' )]),
775- ]
776- for html , expected in data :
777- self ._run_check (html , expected )
778-
779843 def test_eof_in_comments (self ):
780844 data = [
781845 ('<!--' , [('comment' , '' )]),
0 commit comments