88from test import support
99
1010
11+ SAMPLE_RCDATA = (
12+ '<!-- not a comment -->'
13+ "<not a='start tag'>"
14+ '<![CDATA[not a cdata]]>'
15+ '<!not a bogus comment>'
16+ '</not a bogus comment>'
17+ '\u2603 '
18+ )
19+
20+ SAMPLE_RAWTEXT = SAMPLE_RCDATA + '&☺'
21+
22+
1123class EventCollector (html .parser .HTMLParser ):
1224
1325 def __init__ (self , * args , autocdata = False , ** kw ):
@@ -293,30 +305,20 @@ def test_get_starttag_text(self):
293305 'Date().getTime()+\' "><\\ /s\' +\' cript>\' );\n //]]>' ),
294306 '\n <!-- //\n var foo = 3.14;\n // -->\n ' ,
295307 '<!-- \u2603 -->' ,
296- 'foo = "</ script>"' ,
297- 'foo = "</scripture>"' ,
298- 'foo = "</script\v >"' ,
299- 'foo = "</script\xa0 >"' ,
300- 'foo = "</ſcript>"' ,
301- 'foo = "</scrıpt>"' ,
302308 ])
303309 def test_script_content (self , content ):
304310 s = f'<script>{ content } </script>'
305- self ._run_check (s , [("starttag" , "script" , []),
306- ("data" , content ),
307- ("endtag" , "script" )])
311+ self ._run_check (s , [
312+ ("starttag" , "script" , []),
313+ ("data" , content ),
314+ ("endtag" , "script" ),
315+ ])
308316
309317 @support .subTests ('content' , [
310318 'a::before { content: "<!-- not a comment -->"; }' ,
311319 'a::before { content: "¬-an-entity-ref;"; }' ,
312320 'a::before { content: "<not a=\' start tag\' >"; }' ,
313321 'a::before { content: "\u2603 "; }' ,
314- 'a::before { content: "< /style>"; }' ,
315- 'a::before { content: "</ style>"; }' ,
316- 'a::before { content: "</styled>"; }' ,
317- 'a::before { content: "</style\v >"; }' ,
318- 'a::before { content: "</style\xa0 >"; }' ,
319- 'a::before { content: "</ſtyle>"; }' ,
320322 ])
321323 def test_style_content (self , content ):
322324 s = f'<style>{ content } </style>'
@@ -326,23 +328,10 @@ def test_style_content(self, content):
326328
327329 @support .subTests ('tag' , ['title' , 'textarea' ])
328330 def test_rcdata_content (self , tag ):
329- content = (
330- '<!-- not a comment -->'
331- "<not a='start tag'>"
332- '<![CDATA[not a cdata]]>'
333- '<!not a bogus comment>'
334- '</not a bogus comment>'
335- '\u2603 '
336- f'< /{ tag } >'
337- f'</ { tag } >'
338- f'</{ tag } x>'
339- f'</{ tag } \v >'
340- f'</{ tag } \xa0 >'
341- )
342- source = f"<{ tag } >{ content } </{ tag } >"
331+ source = f"<{ tag } >{ SAMPLE_RCDATA } </{ tag } >"
343332 self ._run_check (source , [
344333 ("starttag" , tag , []),
345- ("data" , content ),
334+ ("data" , SAMPLE_RCDATA ),
346335 ("endtag" , tag ),
347336 ])
348337 source = f"<{ tag } >&</{ tag } >"
@@ -355,107 +344,43 @@ def test_rcdata_content(self, tag):
355344 @support .subTests ('tag' ,
356345 ['style' , 'xmp' , 'iframe' , 'noembed' , 'noframes' , 'script' ])
357346 def test_rawtext_content (self , tag ):
358- content = (
359- '<!-- not a comment -->'
360- '¬-an-entity-ref;'
361- "<not a='start tag'>"
362- '<![CDATA[not a cdata]]>'
363- '<!not a bogus comment>'
364- '</not a bogus comment>'
365- '\u2603 '
366- f'< /{ tag } >'
367- f'</ { tag } >'
368- f'</{ tag } x>'
369- f'</{ tag } \v >'
370- f'</{ tag } \xa0 >'
371- )
372- source = f"<{ tag } >{ content } </{ tag } >"
347+ source = f"<{ tag } >{ SAMPLE_RAWTEXT } </{ tag } >"
373348 self ._run_check (source , [
374349 ("starttag" , tag , []),
375- ("data" , content ),
350+ ("data" , SAMPLE_RAWTEXT ),
376351 ("endtag" , tag ),
377352 ])
378353
379354 def test_noscript_content (self ):
380- content = (
381- '<!-- not a comment -->'
382- '¬-an-entity-ref;'
383- "<not a='start tag'>"
384- '<![CDATA[not a cdata]]>'
385- '<!not a bogus comment>'
386- '</not a bogus comment>'
387- '\u2603 '
388- f'< /noscript>'
389- f'</ noscript>'
390- f'</noscriptx>'
391- f'</noscript\v >'
392- f'</noscript\xa0 >'
393- )
394- source = f"<noscript>{ content } </noscript>"
355+ source = f"<noscript>{ SAMPLE_RAWTEXT } </noscript>"
356+ # scripting=False -- normal mode
395357 self ._run_check (source , [
396358 ('starttag' , 'noscript' , []),
397359 ('comment' , ' not a comment ' ),
398- ('entityref' , 'not' ),
399- ('data' , '-an-entity-ref;' ),
400360 ('starttag' , 'not' , [('a' , 'start tag' )]),
401361 ('unknown decl' , 'CDATA[not a cdata' ),
402362 ('comment' , 'not a bogus comment' ),
403363 ('endtag' , 'not' ),
404- ('data' , '☃< /noscript>' ),
405- ('comment' , ' noscript' ),
406- ('endtag' , 'noscriptx' ),
407- ('endtag' , 'noscript\x0b ' ),
408- ('endtag' , 'noscript\xa0 ' ),
409- ('endtag' , 'noscript' )
364+ ('data' , '☃' ),
365+ ('entityref' , 'amp' ),
366+ ('charref' , '9786' ),
367+ ('endtag' , 'noscript' ),
410368 ])
369+ # scripting=True -- RAWTEXT mode
411370 self ._run_check (source , [
412371 ("starttag" , "noscript" , []),
413- ("data" , content ),
372+ ("data" , SAMPLE_RAWTEXT ),
414373 ("endtag" , "noscript" ),
415- ], collector = EventCollector (convert_charrefs = False , scripting = True ))
374+ ], collector = EventCollector (scripting = True ))
416375
417376 def test_plaintext_content (self ):
418- content = (
419- '<!-- not a comment -->'
420- '¬-an-entity-ref;'
421- "<not a='start tag'>"
422- '<![CDATA[not a cdata]]>'
423- '<!not a bogus comment>'
424- '</not a bogus comment>'
425- '\u2603 '
426- '</plaintext>'
427- )
377+ content = SAMPLE_RAWTEXT + '</plaintext>' # not closing
428378 source = f"<plaintext>{ content } "
429379 self ._run_check (source , [
430380 ("starttag" , "plaintext" , []),
431381 ("data" , content ),
432382 ])
433383
434- @support .subTests ('tag,endtag' , [
435- ('title' , 'tıtle' ),
436- ('style' , 'ſtyle' ),
437- ('style' , 'ſtyle' ),
438- ('style' , 'style' ),
439- ('iframe' , 'ıframe' ),
440- ('noframes' , 'noframeſ' ),
441- ('noscript' , 'noſcript' ),
442- ('noscript' , 'noscrıpt' ),
443- ('script' , 'ſcript' ),
444- ('script' , 'scrıpt' ),
445- ])
446- def test_invalid_nonascii_closing_tag (self , tag , endtag ):
447- source = f"<{ tag } ><a></{ endtag } >"
448- self ._run_check (source , [
449- ("starttag" , tag , []),
450- ("data" , f"<a></{ endtag } >" ),
451- ], collector = EventCollector (convert_charrefs = False , scripting = True ))
452- source = f"<{ tag } ><a></{ endtag } ></{ tag } >"
453- self ._run_check (source , [
454- ("starttag" , tag , []),
455- ("data" , f"<a></{ endtag } >" ),
456- ("endtag" , tag ),
457- ], collector = EventCollector (convert_charrefs = False , scripting = True ))
458-
459384 @support .subTests ('endtag' , ['script' , 'SCRIPT' , 'script ' , 'script\n ' ,
460385 'script/' , 'script foo=bar' , 'script foo=">"' ])
461386 def test_script_closing_tag (self , endtag ):
@@ -470,66 +395,65 @@ def test_script_closing_tag(self, endtag):
470395 ("endtag" , "script" )],
471396 collector = EventCollectorNoNormalize (convert_charrefs = False ))
472397
473- @support .subTests ('endtag' , ['style' , 'STYLE' , 'style ' , 'style\n ' ,
474- 'style/' , 'style foo=bar' , 'style foo=">"' ])
475- def test_style_closing_tag (self , endtag ):
476- content = """
477- b::before { content: "<!-- not a comment -->"; }
478- p::before { content: "¬-an-entity-ref;"; }
479- a::before { content: "<i>"; }
480- a::after { content: "</i>"; }
481- """
482- s = f'<StyLE>{ content } </{ endtag } >'
483- self ._run_check (s , [("starttag" , "style" , []),
484- ("data" , content ),
485- ("endtag" , "style" )],
486- collector = EventCollectorNoNormalize (convert_charrefs = False ))
487-
488- @support .subTests ('endtag' , ['title' , 'TITLE' , 'title ' , 'title\n ' ,
489- 'title/' , 'title foo=bar' , 'title foo=">"' ])
490- def test_title_closing_tag (self , endtag ):
491- content = "<!-- not a comment --><i>Egg & Spam</i>"
492- s = f'<TitLe>{ content } </{ endtag } >'
493- self ._run_check (s , [("starttag" , "title" , []),
494- ('data' , '<!-- not a comment --><i>Egg & Spam</i>' ),
495- ("endtag" , "title" )],
496- collector = EventCollectorNoNormalize (convert_charrefs = True ))
497- self ._run_check (s , [("starttag" , "title" , []),
498- ('data' , '<!-- not a comment --><i>Egg ' ),
499- ('entityref' , 'amp' ),
500- ('data' , ' Spam</i>' ),
501- ("endtag" , "title" )],
502- collector = EventCollectorNoNormalize (convert_charrefs = False ))
503-
504- @support .subTests ('endtag' , ['textarea' , 'TEXTAREA' , 'textarea ' , 'textarea\n ' ,
505- 'textarea/' , 'textarea foo=bar' , 'textarea foo=">"' ])
506- def test_textarea_closing_tag (self , endtag ):
507- content = "<!-- not a comment --><i>Egg & Spam</i>"
508- s = f'<TexTarEa>{ content } </{ endtag } >'
509- self ._run_check (s , [("starttag" , "textarea" , []),
510- ('data' , '<!-- not a comment --><i>Egg & Spam</i>' ),
511- ("endtag" , "textarea" )],
512- collector = EventCollectorNoNormalize (convert_charrefs = True ))
513- self ._run_check (s , [("starttag" , "textarea" , []),
514- ('data' , '<!-- not a comment --><i>Egg ' ),
515- ('entityref' , 'amp' ),
516- ('data' , ' Spam</i>' ),
517- ("endtag" , "textarea" )],
518- collector = EventCollectorNoNormalize (convert_charrefs = False ))
519-
520- @support .subTests ('starttag' , ['TitLe' , 'TexTarEa' , 'StyLE' , 'XmP' ,
521- 'iFraMe' , 'noEmBed' , 'noFraMes' , 'noScrIPt' ,
522- 'ScrIPt' ])
523- def test_closing_tag (self , starttag ):
524- tag = starttag .lower ()
398+ @support .subTests ('tag' , [
399+ 'script' , 'style' , 'xmp' , 'iframe' , 'noembed' , 'noframes' ,
400+ 'textarea' , 'title' , 'noscript' ,
401+ ])
402+ def test_closing_tag (self , tag ):
525403 for endtag in [tag , tag .upper (), f'{ tag } ' , f'{ tag } \n ' ,
526404 f'{ tag } /' , f'{ tag } foo=bar' , f'{ tag } foo=">"' ]:
527405 content = "<!-- not a comment --><i>Spam</i>"
528- s = f'<{ starttag } >{ content } </{ endtag } >'
529- self ._run_check (s , [("starttag" , tag , []),
530- ('data' , content ),
531- ("endtag" , tag )],
532- collector = EventCollectorNoNormalize (convert_charrefs = False , scripting = True ))
406+ s = f'<{ tag .upper ()} >{ content } </{ endtag } >'
407+ self ._run_check (s , [
408+ ("starttag" , tag , []),
409+ ('data' , content ),
410+ ("endtag" , tag ),
411+ ], collector = EventCollectorNoNormalize (convert_charrefs = False , scripting = True ))
412+
413+ @support .subTests ('tag' , [
414+ 'script' , 'style' , 'xmp' , 'iframe' , 'noembed' , 'noframes' ,
415+ 'textarea' , 'title' , 'noscript' ,
416+ ])
417+ def test_invalid_closing_tag (self , tag ):
418+ content = (
419+ f'< /{ tag } >'
420+ f'</ { tag } >'
421+ f'</{ tag } x>'
422+ f'</{ tag } \v >'
423+ f'</{ tag } \xa0 >'
424+ )
425+ source = f"<{ tag } >{ content } </{ tag } >"
426+ self ._run_check (source , [
427+ ("starttag" , tag , []),
428+ ("data" , content ),
429+ ("endtag" , tag ),
430+ ], collector = EventCollector (convert_charrefs = False , scripting = True ))
431+
432+ @support .subTests ('tag,endtag' , [
433+ ('title' , 'tıtle' ),
434+ ('style' , 'ſtyle' ),
435+ ('style' , 'ſtyle' ),
436+ ('style' , 'style' ),
437+ ('iframe' , 'ıframe' ),
438+ ('noframes' , 'noframeſ' ),
439+ ('noscript' , 'noſcript' ),
440+ ('noscript' , 'noscrıpt' ),
441+ ('script' , 'ſcript' ),
442+ ('script' , 'scrıpt' ),
443+ ])
444+ def test_invalid_nonascii_closing_tag (self , tag , endtag ):
445+ content = f"<br></{ endtag } >"
446+ source = f"<{ tag } >{ content } "
447+ self ._run_check (source , [
448+ ("starttag" , tag , []),
449+ ("data" , content ),
450+ ], collector = EventCollector (convert_charrefs = False , scripting = True ))
451+ source = f"<{ tag } >{ content } </{ tag } >"
452+ self ._run_check (source , [
453+ ("starttag" , tag , []),
454+ ("data" , content ),
455+ ("endtag" , tag ),
456+ ], collector = EventCollector (convert_charrefs = False , scripting = True ))
533457
534458 @support .subTests ('tail,end' , [
535459 ('' , False ),
0 commit comments