9
9
10
10
class EventCollector (html .parser .HTMLParser ):
11
11
12
- def __init__ (self , * args , ** kw ):
12
+ def __init__ (self , * args , autocdata = False , ** kw ):
13
+ self .autocdata = autocdata
13
14
self .events = []
14
15
self .append = self .events .append
15
16
html .parser .HTMLParser .__init__ (self , * args , ** kw )
17
+ if autocdata :
18
+ self ._set_support_cdata (False )
16
19
17
20
def get_events (self ):
18
21
# Normalize the list of events so that buffer artefacts don't
@@ -33,12 +36,16 @@ def get_events(self):
33
36
34
37
def handle_starttag (self , tag , attrs ):
35
38
self .append (("starttag" , tag , attrs ))
39
+ if self .autocdata and tag == 'svg' :
40
+ self ._set_support_cdata (True )
36
41
37
42
def handle_startendtag (self , tag , attrs ):
38
43
self .append (("startendtag" , tag , attrs ))
39
44
40
45
def handle_endtag (self , tag ):
41
46
self .append (("endtag" , tag ))
47
+ if self .autocdata and tag == 'svg' :
48
+ self ._set_support_cdata (False )
42
49
43
50
# all other markup
44
51
@@ -739,10 +746,6 @@ def test_eof_in_declarations(self):
739
746
('<!' , [('comment' , '' )]),
740
747
('<!-' , [('comment' , '-' )]),
741
748
('<![' , [('comment' , '[' )]),
742
- ('<![CDATA[' , [('unknown decl' , 'CDATA[' )]),
743
- ('<![CDATA[x' , [('unknown decl' , 'CDATA[x' )]),
744
- ('<![CDATA[x]' , [('unknown decl' , 'CDATA[x]' )]),
745
- ('<![CDATA[x]]' , [('unknown decl' , 'CDATA[x]]' )]),
746
749
('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
747
750
('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
748
751
('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -755,6 +758,18 @@ def test_eof_in_declarations(self):
755
758
for html , expected in data :
756
759
self ._run_check (html , expected )
757
760
761
+ @support .subTests ('content' , ['' , 'x' , 'x]' , 'x]]' ])
762
+ def test_eof_in_cdata (self , content ):
763
+ self ._run_check ('<![CDATA[' + content ,
764
+ [('unknown decl' , 'CDATA[' + content )])
765
+ self ._run_check ('<![CDATA[' + content ,
766
+ [('comment' , '[CDATA[' + content )],
767
+ collector = EventCollector (autocdata = True ))
768
+ self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
769
+ [('starttag' , 'svg' , []),
770
+ ('starttag' , 'text' , [('y' , '100' )]),
771
+ ('unknown decl' , 'CDATA[' + content )])
772
+
758
773
def test_bogus_comments (self ):
759
774
html = ('<!ELEMENT br EMPTY>'
760
775
'<! not really a comment >'
@@ -804,7 +819,56 @@ def test_broken_condcoms(self):
804
819
('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
805
820
('unknown decl' , 'endif' )
806
821
]
822
+
823
+ self ._run_check (html , expected )
824
+
825
+ @support .subTests ('content' , [
826
+ 'just some plain text' ,
827
+ '<!-- not a comment -->' ,
828
+ '¬-an-entity-ref;' ,
829
+ "<not a='start tag'>" ,
830
+ '' ,
831
+ '[[I have many brackets]]' ,
832
+ 'I have a > in the middle' ,
833
+ 'I have a ]] in the middle' ,
834
+ '] ]>' ,
835
+ ']] >' ,
836
+ ('\n '
837
+ ' if (a < b && a > b) {\n '
838
+ ' printf("[<marquee>How?</marquee>]");\n '
839
+ ' }\n ' ),
840
+ ])
841
+ def test_cdata_section_content (self , content ):
842
+ # See "13.2.5.42 Markup declaration open state",
843
+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
844
+ html = f'<svg><text y="100"><![CDATA[{ content } ]]></text></svg>'
845
+ expected = [
846
+ ('starttag' , 'svg' , []),
847
+ ('starttag' , 'text' , [('y' , '100' )]),
848
+ ('unknown decl' , 'CDATA[' + content ),
849
+ ('endtag' , 'text' ),
850
+ ('endtag' , 'svg' ),
851
+ ]
807
852
self ._run_check (html , expected )
853
+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
854
+
855
+ def test_cdata_section (self ):
856
+ # See "13.2.5.42 Markup declaration open state".
857
+ html = ('<![CDATA[foo<br>bar]]>'
858
+ '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
859
+ '<![CDATA[foo<br>bar]]>' )
860
+ expected = [
861
+ ('comment' , '[CDATA[foo<br' ),
862
+ ('data' , 'bar]]>' ),
863
+ ('starttag' , 'svg' , []),
864
+ ('starttag' , 'text' , [('y' , '100' )]),
865
+ ('unknown decl' , 'CDATA[foo<br>bar' ),
866
+ ('endtag' , 'text' ),
867
+ ('endtag' , 'svg' ),
868
+ ('comment' , '[CDATA[foo<br' ),
869
+ ('data' , 'bar]]>' ),
870
+ ]
871
+ self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
808
872
809
873
def test_convert_charrefs_dropped_text (self ):
810
874
# #23144: make sure that all the events are triggered when
@@ -1041,27 +1105,6 @@ def test_weird_chars_in_unquoted_attribute_values(self):
1041
1105
('starttag' , 'form' ,
1042
1106
[('action' , 'bogus|&#()value' )])])
1043
1107
1044
- def test_invalid_keyword_error_exception (self ):
1045
- # bpo-34480: check that subclasses that define an
1046
- # error method that raises an exception work
1047
- class InvalidMarkupException (Exception ):
1048
- pass
1049
- class MyHTMLParser (html .parser .HTMLParser ):
1050
- def error (self , message ):
1051
- raise InvalidMarkupException (message )
1052
- parser = MyHTMLParser ()
1053
- with self .assertRaises (InvalidMarkupException ):
1054
- parser .feed ('<![invalid>' )
1055
-
1056
- def test_invalid_keyword_error_pass (self ):
1057
- # bpo-34480: check that subclasses that define an
1058
- # error method that doesn't raise an exception work
1059
- class MyHTMLParser (html .parser .HTMLParser ):
1060
- def error (self , message ):
1061
- pass
1062
- parser = MyHTMLParser ()
1063
- self .assertEqual (parser .feed ('<![invalid>' ), None )
1064
-
1065
1108
1066
1109
if __name__ == "__main__" :
1067
1110
unittest .main ()
0 commit comments