1010
1111class EventCollector (html .parser .HTMLParser ):
1212
13- def __init__ (self , * args , ** kw ):
13+ def __init__ (self , * args , autocdata = False , ** kw ):
14+ self .autocdata = autocdata
1415 self .events = []
1516 self .append = self .events .append
1617 html .parser .HTMLParser .__init__ (self , * args , ** kw )
18+ if autocdata :
19+ self .support_cdata (False )
1720
1821 def get_events (self ):
1922 # Normalize the list of events so that buffer artefacts don't
@@ -34,15 +37,15 @@ def get_events(self):
3437
3538 def handle_starttag (self , tag , attrs ):
3639 self .append (("starttag" , tag , attrs ))
37- if tag == 'svg' :
40+ if self . autocdata and tag == 'svg' :
3841 self .support_cdata (True )
3942
4043 def handle_startendtag (self , tag , attrs ):
4144 self .append (("startendtag" , tag , attrs ))
4245
4346 def handle_endtag (self , tag ):
4447 self .append (("endtag" , tag ))
45- if tag == 'svg' :
48+ if self . autocdata and tag == 'svg' :
4649 self .support_cdata (False )
4750
4851 # all other markup
@@ -771,22 +774,6 @@ def test_eof_in_declarations(self):
771774 ('<!' , [('comment' , '' )]),
772775 ('<!-' , [('comment' , '-' )]),
773776 ('<![' , [('comment' , '[' )]),
774- ('<![CDATA[' , [('comment' , '![CDATA[' )]),
775- ('<![CDATA[x' , [('comment' , '![CDATA[x' )]),
776- ('<![CDATA[x]' , [('comment' , '![CDATA[x]' )]),
777- ('<![CDATA[x]]' , [('comment' , '![CDATA[x]]' )]),
778- ('<svg><text y="100"><![CDATA[' ,
779- [('starttag' , 'svg' , []), ('starttag' , 'text' , [('y' , '100' )]),
780- ('unknown decl' , 'CDATA[' )]),
781- ('<svg><text y="100"><![CDATA[x' ,
782- [('starttag' , 'svg' , []), ('starttag' , 'text' , [('y' , '100' )]),
783- ('unknown decl' , 'CDATA[x' )]),
784- ('<svg><text y="100"><![CDATA[x]' ,
785- [('starttag' , 'svg' , []), ('starttag' , 'text' , [('y' , '100' )]),
786- ('unknown decl' , 'CDATA[x]' )]),
787- ('<svg><text y="100"><![CDATA[x]]' ,
788- [('starttag' , 'svg' , []), ('starttag' , 'text' , [('y' , '100' )]),
789- ('unknown decl' , 'CDATA[x]]' )]),
790777 ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
791778 ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
792779 ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -799,6 +786,18 @@ def test_eof_in_declarations(self):
799786 for html , expected in data :
800787 self ._run_check (html , expected )
801788
789+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
790+ def test_eof_in_cdata (self , content ):
791+ self ._run_check ('<![CDATA[' + content ,
792+ [('unknown decl' , 'CDATA[' + content )])
793+ self ._run_check ('<![CDATA[' + content ,
794+ [('comment' , '![CDATA[' + content )],
795+ collector = EventCollector (autocdata = True ))
796+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
797+ [('starttag' , 'svg' , []),
798+ ('starttag' , 'text' , [('y' , '100' )]),
799+ ('unknown decl' , 'CDATA[' + content )])
800+
802801 def test_bogus_comments (self ):
803802 html = ('<!ELEMENT br EMPTY>'
804803 '<! not really a comment >'
@@ -889,6 +888,7 @@ def test_cdata_section_content(self, content):
889888 ('endtag' , 'svg' ),
890889 ]
891890 self ._run_check (html , expected )
891+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
892892
893893 def test_cdata_section (self ):
894894 # See "13.2.5.42 Markup declaration open state".
@@ -906,7 +906,7 @@ def test_cdata_section(self):
906906 ('comment' , '[CDATA[foo<br' ),
907907 ('data' , 'bar]]>' ),
908908 ]
909- self ._run_check (html , expected )
909+ self ._run_check (html , expected , collector = EventCollector ( autocdata = True ) )
910910
911911 def test_convert_charrefs_dropped_text (self ):
912912 # #23144: make sure that all the events are triggered when
0 commit comments