@@ -50,6 +50,9 @@ def handle_endtag(self, tag):
5050
5151    # all other markup 
5252
53+     def  handle_bogus_comment (self , data ):
54+         self .append (("bogus comment" , data ))
55+ 
5356    def  handle_comment (self , data ):
5457        self .append (("comment" , data ))
5558
@@ -606,14 +609,14 @@ def test_starttag_junk_chars(self):
606609        self ._run_check ("< " , [('data' , '< ' )])
607610        self ._run_check ("</>" , [])
608611        self ._run_check ("<$>" , [('data' , '<$>' )])
609-         self ._run_check ("</$>" , [('comment' , '$' )])
612+         self ._run_check ("</$>" , [('bogus  comment' , '$' )])
610613        self ._run_check ("</" , [('data' , '</' )])
611614        self ._run_check ("</a" , [])
612-         self ._run_check ("</ a>" , [('comment' , ' a' )])
613-         self ._run_check ("</ a" , [('comment' , ' a' )])
615+         self ._run_check ("</ a>" , [('bogus  comment' , ' a' )])
616+         self ._run_check ("</ a" , [('bogus  comment' , ' a' )])
614617        self ._run_check ("<a<a>" , [('starttag' , 'a<a' , [])])
615618        self ._run_check ("</a<a>" , [('endtag' , 'a<a' )])
616-         self ._run_check ("<!" , [('comment' , '' )])
619+         self ._run_check ("<!" , [('bogus  comment' , '' )])
617620        self ._run_check ("<a" , [])
618621        self ._run_check ("<a foo='bar'" , [])
619622        self ._run_check ("<a foo='bar" , [])
@@ -666,7 +669,7 @@ def test_declaration_junk_chars(self):
666669
667670    def  test_illegal_declarations (self ):
668671        self ._run_check ('<!spacer type="block" height="25">' ,
669-                         [('comment' , 'spacer type="block" height="25"' )])
672+                         [('bogus  comment' , 'spacer type="block" height="25"' )])
670673
671674    def  test_invalid_end_tags (self ):
672675        # A collection of broken end tags. <br> is used as separator. 
@@ -681,8 +684,8 @@ def test_invalid_end_tags(self):
681684                    # text and attributes are discarded 
682685                    ('endtag' , 'div' ),
683686                    ('starttag' , 'br' , []),
684-                     # comment because the first char after </ is not a-zA-Z 
685-                     ('comment' , '<h4' ),
687+                     # bogus  comment because the first char after </ is not a-zA-Z 
688+                     ('bogus  comment' , '<h4' ),
686689                    ('starttag' , 'br' , []),
687690                    # attributes are discarded 
688691                    ('endtag' , 'li' ),
@@ -771,9 +774,9 @@ def test_eof_in_comments(self):
771774
772775    def  test_eof_in_declarations (self ):
773776        data  =  [
774-             ('<!' , [('comment' , '' )]),
775-             ('<!-' , [('comment' , '-' )]),
776-             ('<![' , [('comment' , '[' )]),
777+             ('<!' , [('bogus  comment' , '' )]),
778+             ('<!-' , [('bogus  comment' , '-' )]),
779+             ('<![' , [('bogus  comment' , '[' )]),
777780            ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
778781            ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
779782            ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -791,7 +794,7 @@ def test_eof_in_cdata(self, content):
791794        self ._run_check ('<![CDATA['  +  content ,
792795                        [('unknown decl' , 'CDATA['  +  content )])
793796        self ._run_check ('<![CDATA['  +  content ,
794-                         [('comment' , '![CDATA['  +  content )],
797+                         [('bogus  comment' , '![CDATA['  +  content )],
795798                        collector = EventCollector (autocdata = True ))
796799        self ._run_check ('<svg><text y="100"><![CDATA['  +  content ,
797800                        [('starttag' , 'svg' , []),
@@ -814,19 +817,19 @@ def test_bogus_comments(self):
814817                '<![CDATA]]>'   # required '[' after CDATA 
815818        )
816819        expected  =  [
817-             ('comment' , 'ELEMENT br EMPTY' ),
818-             ('comment' , ' not really a comment ' ),
819-             ('comment' , ' not a comment either --' ),
820-             ('comment' , ' -- close enough --' ),
821-             ('comment' , '' ),
822-             ('comment' , '<-- this was an empty comment' ),
823-             ('comment' , '!! another bogus comment !!!' ),
824-             ('comment' , '[with square brackets]!' ),
825-             ('comment' , '[\n multiline\n bogusness\n ]!' ),
826-             ('comment' , '[more brackets]-[and a hyphen]!' ),
827-             ('comment' , '[cdata[should be uppercase]]' ),
828-             ('comment' , '[CDATA [whitespaces are not ignored]]' ),
829-             ('comment' , '[CDATA]]' ),
820+             ('bogus  comment' , 'ELEMENT br EMPTY' ),
821+             ('bogus  comment' , ' not really a comment ' ),
822+             ('bogus  comment' , ' not a comment either --' ),
823+             ('bogus  comment' , ' -- close enough --' ),
824+             ('bogus  comment' , '' ),
825+             ('bogus  comment' , '<-- this was an empty comment' ),
826+             ('bogus  comment' , '!! another bogus comment !!!' ),
827+             ('bogus  comment' , '[with square brackets]!' ),
828+             ('bogus  comment' , '[\n multiline\n bogusness\n ]!' ),
829+             ('bogus  comment' , '[more brackets]-[and a hyphen]!' ),
830+             ('bogus  comment' , '[cdata[should be uppercase]]' ),
831+             ('bogus  comment' , '[CDATA [whitespaces are not ignored]]' ),
832+             ('bogus  comment' , '[CDATA]]' ),
830833        ]
831834        self ._run_check (html , expected )
832835
@@ -840,23 +843,23 @@ def test_broken_condcoms(self):
840843                '<![if !ie 6]><b>foo</b><![endif]>' 
841844                '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>' )
842845        expected  =  [
843-             ('comment' , '[if !(IE)]' ),
846+             ('bogus  comment' , '[if !(IE)]' ),
844847            ('data' , 'broken condcom' ),
845-             ('comment' , '[endif]' ),
846-             ('comment' , '[if ! IE]' ),
848+             ('bogus  comment' , '[endif]' ),
849+             ('bogus  comment' , '[if ! IE]' ),
847850            ('startendtag' , 'link' , [('href' , 'favicon.tiff' )]),
848-             ('comment' , '[endif]' ),
849-             ('comment' , '[if !IE 6]' ),
851+             ('bogus  comment' , '[endif]' ),
852+             ('bogus  comment' , '[if !IE 6]' ),
850853            ('startendtag' , 'img' , [('src' , 'firefox.png' )]),
851-             ('comment' , '[endif]' ),
852-             ('comment' , '[if !ie 6]' ),
854+             ('bogus  comment' , '[endif]' ),
855+             ('bogus  comment' , '[if !ie 6]' ),
853856            ('starttag' , 'b' , []),
854857            ('data' , 'foo' ),
855858            ('endtag' , 'b' ),
856-             ('comment' , '[endif]' ),
857-             ('comment' , '[if (!IE)|(lt IE 9)]' ),
859+             ('bogus  comment' , '[endif]' ),
860+             ('bogus  comment' , '[if (!IE)|(lt IE 9)]' ),
858861            ('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
859-             ('comment' , '[endif]' )
862+             ('bogus  comment' , '[endif]' )
860863        ]
861864        self ._run_check (html , expected )
862865
@@ -896,14 +899,14 @@ def test_cdata_section(self):
896899                '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>' 
897900                '<![CDATA[foo<br>bar]]>' )
898901        expected  =  [
899-             ('comment' , '[CDATA[foo<br' ),
902+             ('bogus  comment' , '[CDATA[foo<br' ),
900903            ('data' , 'bar]]>' ),
901904            ('starttag' , 'svg' , []),
902905            ('starttag' , 'text' , [('y' , '100' )]),
903906            ('unknown decl' , 'CDATA[foo<br>bar' ),
904907            ('endtag' , 'text' ),
905908            ('endtag' , 'svg' ),
906-             ('comment' , '[CDATA[foo<br' ),
909+             ('bogus  comment' , '[CDATA[foo<br' ),
907910            ('data' , 'bar]]>' ),
908911        ]
909912        self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
0 commit comments