Skip to content

Commit 69a2b33

Browse files
Rewrite tests.
1 parent 9971a24 commit 69a2b33

File tree

1 file changed

+87
-163
lines changed

1 file changed

+87
-163
lines changed

Lib/test/test_htmlparser.py

Lines changed: 87 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@
88
from test import support
99

1010

11+
SAMPLE_RCDATA = (
12+
'<!-- not a comment -->'
13+
"<not a='start tag'>"
14+
'<![CDATA[not a cdata]]>'
15+
'<!not a bogus comment>'
16+
'</not a bogus comment>'
17+
'\u2603'
18+
)
19+
20+
SAMPLE_RAWTEXT = SAMPLE_RCDATA + '&amp;&#9786;'
21+
22+
1123
class EventCollector(html.parser.HTMLParser):
1224

1325
def __init__(self, *args, autocdata=False, **kw):
@@ -293,30 +305,20 @@ def test_get_starttag_text(self):
293305
'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'),
294306
'\n<!-- //\nvar foo = 3.14;\n// -->\n',
295307
'<!-- \u2603 -->',
296-
'foo = "</ script>"',
297-
'foo = "</scripture>"',
298-
'foo = "</script\v>"',
299-
'foo = "</script\xa0>"',
300-
'foo = "</ſcript>"',
301-
'foo = "</scrıpt>"',
302308
])
303309
def test_script_content(self, content):
304310
s = f'<script>{content}</script>'
305-
self._run_check(s, [("starttag", "script", []),
306-
("data", content),
307-
("endtag", "script")])
311+
self._run_check(s, [
312+
("starttag", "script", []),
313+
("data", content),
314+
("endtag", "script"),
315+
])
308316

309317
@support.subTests('content', [
310318
'a::before { content: "<!-- not a comment -->"; }',
311319
'a::before { content: "&not-an-entity-ref;"; }',
312320
'a::before { content: "<not a=\'start tag\'>"; }',
313321
'a::before { content: "\u2603"; }',
314-
'a::before { content: "< /style>"; }',
315-
'a::before { content: "</ style>"; }',
316-
'a::before { content: "</styled>"; }',
317-
'a::before { content: "</style\v>"; }',
318-
'a::before { content: "</style\xa0>"; }',
319-
'a::before { content: "</ſtyle>"; }',
320322
])
321323
def test_style_content(self, content):
322324
s = f'<style>{content}</style>'
@@ -326,23 +328,10 @@ def test_style_content(self, content):
326328

327329
@support.subTests('tag', ['title', 'textarea'])
328330
def test_rcdata_content(self, tag):
329-
content = (
330-
'<!-- not a comment -->'
331-
"<not a='start tag'>"
332-
'<![CDATA[not a cdata]]>'
333-
'<!not a bogus comment>'
334-
'</not a bogus comment>'
335-
'\u2603'
336-
f'< /{tag}>'
337-
f'</ {tag}>'
338-
f'</{tag}x>'
339-
f'</{tag}\v>'
340-
f'</{tag}\xa0>'
341-
)
342-
source = f"<{tag}>{content}</{tag}>"
331+
source = f"<{tag}>{SAMPLE_RCDATA}</{tag}>"
343332
self._run_check(source, [
344333
("starttag", tag, []),
345-
("data", content),
334+
("data", SAMPLE_RCDATA),
346335
("endtag", tag),
347336
])
348337
source = f"<{tag}>&amp;</{tag}>"
@@ -355,107 +344,43 @@ def test_rcdata_content(self, tag):
355344
@support.subTests('tag',
356345
['style', 'xmp', 'iframe', 'noembed', 'noframes', 'script'])
357346
def test_rawtext_content(self, tag):
358-
content = (
359-
'<!-- not a comment -->'
360-
'&not-an-entity-ref;'
361-
"<not a='start tag'>"
362-
'<![CDATA[not a cdata]]>'
363-
'<!not a bogus comment>'
364-
'</not a bogus comment>'
365-
'\u2603'
366-
f'< /{tag}>'
367-
f'</ {tag}>'
368-
f'</{tag}x>'
369-
f'</{tag}\v>'
370-
f'</{tag}\xa0>'
371-
)
372-
source = f"<{tag}>{content}</{tag}>"
347+
source = f"<{tag}>{SAMPLE_RAWTEXT}</{tag}>"
373348
self._run_check(source, [
374349
("starttag", tag, []),
375-
("data", content),
350+
("data", SAMPLE_RAWTEXT),
376351
("endtag", tag),
377352
])
378353

379354
def test_noscript_content(self):
380-
content = (
381-
'<!-- not a comment -->'
382-
'&not-an-entity-ref;'
383-
"<not a='start tag'>"
384-
'<![CDATA[not a cdata]]>'
385-
'<!not a bogus comment>'
386-
'</not a bogus comment>'
387-
'\u2603'
388-
f'< /noscript>'
389-
f'</ noscript>'
390-
f'</noscriptx>'
391-
f'</noscript\v>'
392-
f'</noscript\xa0>'
393-
)
394-
source = f"<noscript>{content}</noscript>"
355+
source = f"<noscript>{SAMPLE_RAWTEXT}</noscript>"
356+
# scripting=False -- normal mode
395357
self._run_check(source, [
396358
('starttag', 'noscript', []),
397359
('comment', ' not a comment '),
398-
('entityref', 'not'),
399-
('data', '-an-entity-ref;'),
400360
('starttag', 'not', [('a', 'start tag')]),
401361
('unknown decl', 'CDATA[not a cdata'),
402362
('comment', 'not a bogus comment'),
403363
('endtag', 'not'),
404-
('data', '☃< /noscript>'),
405-
('comment', ' noscript'),
406-
('endtag', 'noscriptx'),
407-
('endtag', 'noscript\x0b'),
408-
('endtag', 'noscript\xa0'),
409-
('endtag', 'noscript')
364+
('data', '☃'),
365+
('entityref', 'amp'),
366+
('charref', '9786'),
367+
('endtag', 'noscript'),
410368
])
369+
# scripting=True -- RAWTEXT mode
411370
self._run_check(source, [
412371
("starttag", "noscript", []),
413-
("data", content),
372+
("data", SAMPLE_RAWTEXT),
414373
("endtag", "noscript"),
415-
], collector=EventCollector(convert_charrefs=False, scripting=True))
374+
], collector=EventCollector(scripting=True))
416375

417376
def test_plaintext_content(self):
418-
content = (
419-
'<!-- not a comment -->'
420-
'&not-an-entity-ref;'
421-
"<not a='start tag'>"
422-
'<![CDATA[not a cdata]]>'
423-
'<!not a bogus comment>'
424-
'</not a bogus comment>'
425-
'\u2603'
426-
'</plaintext>'
427-
)
377+
content = SAMPLE_RAWTEXT + '</plaintext>' # not closing
428378
source = f"<plaintext>{content}"
429379
self._run_check(source, [
430380
("starttag", "plaintext", []),
431381
("data", content),
432382
])
433383

434-
@support.subTests('tag,endtag', [
435-
('title', 'tıtle'),
436-
('style', 'ſtyle'),
437-
('style', 'ſtyle'),
438-
('style', 'style'),
439-
('iframe', 'ıframe'),
440-
('noframes', 'noframeſ'),
441-
('noscript', 'noſcript'),
442-
('noscript', 'noscrıpt'),
443-
('script', 'ſcript'),
444-
('script', 'scrıpt'),
445-
])
446-
def test_invalid_nonascii_closing_tag(self, tag, endtag):
447-
source = f"<{tag}><a></{endtag}>"
448-
self._run_check(source, [
449-
("starttag", tag, []),
450-
("data", f"<a></{endtag}>"),
451-
], collector=EventCollector(convert_charrefs=False, scripting=True))
452-
source = f"<{tag}><a></{endtag}></{tag}>"
453-
self._run_check(source, [
454-
("starttag", tag, []),
455-
("data", f"<a></{endtag}>"),
456-
("endtag", tag),
457-
], collector=EventCollector(convert_charrefs=False, scripting=True))
458-
459384
@support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n',
460385
'script/', 'script foo=bar', 'script foo=">"'])
461386
def test_script_closing_tag(self, endtag):
@@ -470,66 +395,65 @@ def test_script_closing_tag(self, endtag):
470395
("endtag", "script")],
471396
collector=EventCollectorNoNormalize(convert_charrefs=False))
472397

473-
@support.subTests('endtag', ['style', 'STYLE', 'style ', 'style\n',
474-
'style/', 'style foo=bar', 'style foo=">"'])
475-
def test_style_closing_tag(self, endtag):
476-
content = """
477-
b::before { content: "<!-- not a comment -->"; }
478-
p::before { content: "&not-an-entity-ref;"; }
479-
a::before { content: "<i>"; }
480-
a::after { content: "</i>"; }
481-
"""
482-
s = f'<StyLE>{content}</{endtag}>'
483-
self._run_check(s, [("starttag", "style", []),
484-
("data", content),
485-
("endtag", "style")],
486-
collector=EventCollectorNoNormalize(convert_charrefs=False))
487-
488-
@support.subTests('endtag', ['title', 'TITLE', 'title ', 'title\n',
489-
'title/', 'title foo=bar', 'title foo=">"'])
490-
def test_title_closing_tag(self, endtag):
491-
content = "<!-- not a comment --><i>Egg &amp; Spam</i>"
492-
s = f'<TitLe>{content}</{endtag}>'
493-
self._run_check(s, [("starttag", "title", []),
494-
('data', '<!-- not a comment --><i>Egg & Spam</i>'),
495-
("endtag", "title")],
496-
collector=EventCollectorNoNormalize(convert_charrefs=True))
497-
self._run_check(s, [("starttag", "title", []),
498-
('data', '<!-- not a comment --><i>Egg '),
499-
('entityref', 'amp'),
500-
('data', ' Spam</i>'),
501-
("endtag", "title")],
502-
collector=EventCollectorNoNormalize(convert_charrefs=False))
503-
504-
@support.subTests('endtag', ['textarea', 'TEXTAREA', 'textarea ', 'textarea\n',
505-
'textarea/', 'textarea foo=bar', 'textarea foo=">"'])
506-
def test_textarea_closing_tag(self, endtag):
507-
content = "<!-- not a comment --><i>Egg &amp; Spam</i>"
508-
s = f'<TexTarEa>{content}</{endtag}>'
509-
self._run_check(s, [("starttag", "textarea", []),
510-
('data', '<!-- not a comment --><i>Egg & Spam</i>'),
511-
("endtag", "textarea")],
512-
collector=EventCollectorNoNormalize(convert_charrefs=True))
513-
self._run_check(s, [("starttag", "textarea", []),
514-
('data', '<!-- not a comment --><i>Egg '),
515-
('entityref', 'amp'),
516-
('data', ' Spam</i>'),
517-
("endtag", "textarea")],
518-
collector=EventCollectorNoNormalize(convert_charrefs=False))
519-
520-
@support.subTests('starttag', ['TitLe', 'TexTarEa', 'StyLE', 'XmP',
521-
'iFraMe', 'noEmBed', 'noFraMes', 'noScrIPt',
522-
'ScrIPt'])
523-
def test_closing_tag(self, starttag):
524-
tag = starttag.lower()
398+
@support.subTests('tag', [
399+
'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes',
400+
'textarea', 'title', 'noscript',
401+
])
402+
def test_closing_tag(self, tag):
525403
for endtag in [tag, tag.upper(), f'{tag} ', f'{tag}\n',
526404
f'{tag}/', f'{tag} foo=bar', f'{tag} foo=">"']:
527405
content = "<!-- not a comment --><i>Spam</i>"
528-
s = f'<{starttag}>{content}</{endtag}>'
529-
self._run_check(s, [("starttag", tag, []),
530-
('data', content),
531-
("endtag", tag)],
532-
collector=EventCollectorNoNormalize(convert_charrefs=False, scripting=True))
406+
s = f'<{tag.upper()}>{content}</{endtag}>'
407+
self._run_check(s, [
408+
("starttag", tag, []),
409+
('data', content),
410+
("endtag", tag),
411+
], collector=EventCollectorNoNormalize(convert_charrefs=False, scripting=True))
412+
413+
@support.subTests('tag', [
414+
'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes',
415+
'textarea', 'title', 'noscript',
416+
])
417+
def test_invalid_closing_tag(self, tag):
418+
content = (
419+
f'< /{tag}>'
420+
f'</ {tag}>'
421+
f'</{tag}x>'
422+
f'</{tag}\v>'
423+
f'</{tag}\xa0>'
424+
)
425+
source = f"<{tag}>{content}</{tag}>"
426+
self._run_check(source, [
427+
("starttag", tag, []),
428+
("data", content),
429+
("endtag", tag),
430+
], collector=EventCollector(convert_charrefs=False, scripting=True))
431+
432+
@support.subTests('tag,endtag', [
433+
('title', 'tıtle'),
434+
('style', 'ſtyle'),
435+
('style', 'ſtyle'),
436+
('style', 'style'),
437+
('iframe', 'ıframe'),
438+
('noframes', 'noframeſ'),
439+
('noscript', 'noſcript'),
440+
('noscript', 'noscrıpt'),
441+
('script', 'ſcript'),
442+
('script', 'scrıpt'),
443+
])
444+
def test_invalid_nonascii_closing_tag(self, tag, endtag):
445+
content = f"<br></{endtag}>"
446+
source = f"<{tag}>{content}"
447+
self._run_check(source, [
448+
("starttag", tag, []),
449+
("data", content),
450+
], collector=EventCollector(convert_charrefs=False, scripting=True))
451+
source = f"<{tag}>{content}</{tag}>"
452+
self._run_check(source, [
453+
("starttag", tag, []),
454+
("data", content),
455+
("endtag", tag),
456+
], collector=EventCollector(convert_charrefs=False, scripting=True))
533457

534458
@support.subTests('tail,end', [
535459
('', False),

0 commit comments

Comments
 (0)