1010
1111class EventCollector (html .parser .HTMLParser ):
1212
13- def __init__ (self , * args , ** kw ):
13+ def __init__ (self , * args , autocdata = False , ** kw ):
14+ self .autocdata = autocdata
1415 self .events = []
1516 self .append = self .events .append
1617 html .parser .HTMLParser .__init__ (self , * args , ** kw )
18+ if autocdata :
19+ self ._set_support_cdata (False )
1720
1821 def get_events (self ):
1922 # Normalize the list of events so that buffer artefacts don't
@@ -34,12 +37,16 @@ def get_events(self):
3437
3538 def handle_starttag (self , tag , attrs ):
3639 self .append (("starttag" , tag , attrs ))
40+ if self .autocdata and tag == 'svg' :
41+ self ._set_support_cdata (True )
3742
3843 def handle_startendtag (self , tag , attrs ):
3944 self .append (("startendtag" , tag , attrs ))
4045
4146 def handle_endtag (self , tag ):
4247 self .append (("endtag" , tag ))
48+ if self .autocdata and tag == 'svg' :
49+ self ._set_support_cdata (False )
4350
4451 # all other markup
4552
@@ -767,10 +774,6 @@ def test_eof_in_declarations(self):
767774 ('<!' , [('comment' , '' )]),
768775 ('<!-' , [('comment' , '-' )]),
769776 ('<![' , [('comment' , '[' )]),
770- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
771- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
772- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
773- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
774777 ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
775778 ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
776779 ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -783,6 +786,18 @@ def test_eof_in_declarations(self):
783786 for html , expected in data :
784787 self ._run_check (html , expected )
785788
789+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
790+ def test_eof_in_cdata (self , content ):
791+ self ._run_check ('<![CDATA[' + content ,
792+ [('unknown decl' , 'CDATA[' + content )])
793+ self ._run_check ('<![CDATA[' + content ,
794+ [('comment' , '![CDATA[' + content )],
795+ collector = EventCollector (autocdata = True ))
796+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
797+ [('starttag' , 'svg' , []),
798+ ('starttag' , 'text' , [('y' , '100' )]),
799+ ('unknown decl' , 'CDATA[' + content )])
800+
786801 def test_bogus_comments (self ):
787802 html = ('<!ELEMENT br EMPTY>'
788803 '<! not really a comment >'
@@ -845,28 +860,53 @@ def test_broken_condcoms(self):
845860 ]
846861 self ._run_check (html , expected )
847862
848- def test_cdata_declarations (self ):
849- # More tests should be added. See also "8.2.4.42. Markup
850- # declaration open state", "8.2.4.69. CDATA section state",
851- # and issue 32876
852- html = ('<![CDATA[just some plain text]]>' )
853- expected = [('unknown decl' , 'CDATA[just some plain text' )]
863+ @support .subTests ('content' , [
864+ 'just some plain text' ,
865+ '<!-- not a comment -->' ,
866+ '¬-an-entity-ref;' ,
867+ "<not a='start tag'>" ,
868+ '' ,
869+ '[[I have many brackets]]' ,
870+ 'I have a > in the middle' ,
871+ 'I have a ]] in the middle' ,
872+ '] ]>' ,
873+ ']] >' ,
874+ ('\n '
875+ ' if (a < b && a > b) {\n '
876+ ' printf("[<marquee>How?</marquee>]");\n '
877+ ' }\n ' ),
878+ ])
879+ def test_cdata_section_content (self , content ):
880+ # See "13.2.5.42 Markup declaration open state",
881+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
882+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
883+ expected = [
884+ ('starttag' , 'svg' , []),
885+ ('starttag' , 'text' , [('y' , '100' )]),
886+ ('unknown decl' , 'CDATA[' + content ),
887+ ('endtag' , 'text' ),
888+ ('endtag' , 'svg' ),
889+ ]
854890 self ._run_check (html , expected )
891+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
855892
856- def test_cdata_declarations_multiline (self ):
857- html = ('<code><![CDATA['
858- ' if (a < b && a > b) {'
859- ' printf("[<marquee>How?</marquee>]");'
860- ' }'
861- ']]></code>' )
893+ def test_cdata_section (self ):
894+ # See "13.2.5.42 Markup declaration open state".
895+ html = ('<![CDATA[foo<br>bar]]>'
896+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
897+ '<![CDATA[foo<br>bar]]>' )
862898 expected = [
863- ('starttag' , 'code' , []),
864- ('unknown decl' ,
865- 'CDATA[ if (a < b && a > b) { '
866- 'printf("[<marquee>How?</marquee>]"); }' ),
867- ('endtag' , 'code' )
899+ ('comment' , '[CDATA[foo<br' ),
900+ ('data' , 'bar]]>' ),
901+ ('starttag' , 'svg' , []),
902+ ('starttag' , 'text' , [('y' , '100' )]),
903+ ('unknown decl' , 'CDATA[foo<br>bar' ),
904+ ('endtag' , 'text' ),
905+ ('endtag' , 'svg' ),
906+ ('comment' , '[CDATA[foo<br' ),
907+ ('data' , 'bar]]>' ),
868908 ]
869- self ._run_check (html , expected )
909+ self ._run_check (html , expected , collector = EventCollector ( autocdata = True ) )
870910
871911 def test_convert_charrefs_dropped_text (self ):
872912 # #23144: make sure that all the events are triggered when
0 commit comments