99
1010class EventCollector (html .parser .HTMLParser ):
1111
12- def __init__ (self , * args , ** kw ):
12+ def __init__ (self , * args , autocdata = False , ** kw ):
13+ self .autocdata = autocdata
1314 self .events = []
1415 self .append = self .events .append
1516 html .parser .HTMLParser .__init__ (self , * args , ** kw )
17+ if autocdata :
18+ self ._set_support_cdata (False )
1619
1720 def get_events (self ):
1821 # Normalize the list of events so that buffer artefacts don't
@@ -33,12 +36,16 @@ def get_events(self):
3336
3437 def handle_starttag (self , tag , attrs ):
3538 self .append (("starttag" , tag , attrs ))
39+ if self .autocdata and tag == 'svg' :
40+ self ._set_support_cdata (True )
3641
3742 def handle_startendtag (self , tag , attrs ):
3843 self .append (("startendtag" , tag , attrs ))
3944
4045 def handle_endtag (self , tag ):
4146 self .append (("endtag" , tag ))
47+ if self .autocdata and tag == 'svg' :
48+ self ._set_support_cdata (False )
4249
4350 # all other markup
4451
@@ -739,10 +746,6 @@ def test_eof_in_declarations(self):
739746 ('<!' , [('comment' , '' )]),
740747 ('<!-' , [('comment' , '-' )]),
741748 ('<![' , [('comment' , '[' )]),
742- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
743- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
744- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
745- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
746749 ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
747750 ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
748751 ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -755,6 +758,18 @@ def test_eof_in_declarations(self):
755758 for html , expected in data :
756759 self ._run_check (html , expected )
757760
761+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
762+ def test_eof_in_cdata (self , content ):
763+ self ._run_check ('<![CDATA[' + content ,
764+ [('unknown decl' , 'CDATA[' + content )])
765+ self ._run_check ('<![CDATA[' + content ,
766+ [('comment' , '[CDATA[' + content )],
767+ collector = EventCollector (autocdata = True ))
768+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
769+ [('starttag' , 'svg' , []),
770+ ('starttag' , 'text' , [('y' , '100' )]),
771+ ('unknown decl' , 'CDATA[' + content )])
772+
758773 def test_bogus_comments (self ):
759774 html = ('<!ELEMENT br EMPTY>'
760775 '<! not really a comment >'
@@ -804,8 +819,57 @@ def test_broken_condcoms(self):
804819 ('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
805820 ('unknown decl' , 'endif' )
806821 ]
822+
807823 self ._run_check (html , expected )
808824
825+ @support .subTests ('content' , [
826+ 'just some plain text' ,
827+ '<!-- not a comment -->' ,
828+ '¬-an-entity-ref;' ,
829+ "<not a='start tag'>" ,
830+ '' ,
831+ '[[I have many brackets]]' ,
832+ 'I have a > in the middle' ,
833+ 'I have a ]] in the middle' ,
834+ '] ]>' ,
835+ ']] >' ,
836+ ('\n '
837+ ' if (a < b && a > b) {\n '
838+ ' printf("[<marquee>How?</marquee>]");\n '
839+ ' }\n ' ),
840+ ])
841+ def test_cdata_section_content (self , content ):
842+ # See "13.2.5.42 Markup declaration open state",
843+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
844+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
845+ expected = [
846+ ('starttag' , 'svg' , []),
847+ ('starttag' , 'text' , [('y' , '100' )]),
848+ ('unknown decl' , 'CDATA[' + content ),
849+ ('endtag' , 'text' ),
850+ ('endtag' , 'svg' ),
851+ ]
852+ self ._run_check (html , expected )
853+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
854+
855+ def test_cdata_section (self ):
856+ # See "13.2.5.42 Markup declaration open state".
857+ html = ('<![CDATA[foo<br>bar]]>'
858+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
859+ '<![CDATA[foo<br>bar]]>' )
860+ expected = [
861+ ('comment' , '[CDATA[foo<br' ),
862+ ('data' , 'bar]]>' ),
863+ ('starttag' , 'svg' , []),
864+ ('starttag' , 'text' , [('y' , '100' )]),
865+ ('unknown decl' , 'CDATA[foo<br>bar' ),
866+ ('endtag' , 'text' ),
867+ ('endtag' , 'svg' ),
868+ ('comment' , '[CDATA[foo<br' ),
869+ ('data' , 'bar]]>' ),
870+ ]
871+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
872+
809873 def test_convert_charrefs_dropped_text (self ):
810874 # #23144: make sure that all the events are triggered when
811875 # convert_charrefs is True, even if we don't call .close()
0 commit comments