Skip to content

Commit 1ddd636

Browse files
ag-eitiltgsnedders
authored andcommitted
Improve coverage of the Tokenizer tests
Slight overkill in places, but I figured it's better to err on the side of too many tests than too little.
1 parent a439a5b commit 1ddd636

File tree

6 files changed

+778
-20
lines changed

6 files changed

+778
-20
lines changed

tokenizer/contentModelFlags.test

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
"input":"<head>&body;",
77
"output":[["Character", "<head>&body;"]]},
88

9+
{"description":"PLAINTEXT with seeming close tag",
10+
"initialStates":["PLAINTEXT state"],
11+
"lastStartTag":"plaintext",
12+
"input":"</plaintext>&body;",
13+
"output":[["Character", "</plaintext>&body;"]]},
14+
915
{"description":"End tag closing RCDATA or RAWTEXT",
1016
"initialStates":["RCDATA state", "RAWTEXT state"],
1117
"lastStartTag":"xmp",

tokenizer/domjs.test

Lines changed: 134 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
]
2626
},
2727
{
28-
"description":"NUL in RCDATA, RAWTEXT, PLAINTEXT and Script data",
28+
"description":"Raw NUL replacement",
2929
"doubleEscaped":true,
3030
"initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
3131
"input":"\\u0000",
@@ -34,6 +34,13 @@
3434
{ "code": "unexpected-null-character", "line": 1, "col": 1 }
3535
]
3636
},
37+
{
38+
"description":"NUL in CDATA section",
39+
"doubleEscaped":true,
40+
"initialStates":["CDATA section state"],
41+
"input":"\\u0000]]>",
42+
"output":[["Character", "\\u0000"]]
43+
},
3744
{
3845
"description":"NUL in script HTML comment",
3946
"doubleEscaped":true,
@@ -112,20 +119,95 @@
112119
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
113120
]
114121
},
122+
{
123+
"description":"Dash in script HTML comment",
124+
"initialStates":["Script data state"],
125+
"input":"<!-- - -->",
126+
"output":[["Character", "<!-- - -->"]]
127+
},
128+
{
129+
"description":"Dash less-than in script HTML comment",
130+
"initialStates":["Script data state"],
131+
"input":"<!-- -< -->",
132+
"output":[["Character", "<!-- -< -->"]]
133+
},
134+
{
135+
"description":"Dash at end of script HTML comment",
136+
"initialStates":["Script data state"],
137+
"input":"<!--test--->",
138+
"output":[["Character", "<!--test--->"]]
139+
},
140+
{
141+
"description":"</script> in script HTML comment",
142+
"initialStates":["Script data state"],
143+
"lastStartTag":"script",
144+
"input":"<!-- </script> --></script>",
145+
"output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
146+
},
147+
{
148+
"description":"</script> in script HTML comment - double escaped",
149+
"initialStates":["Script data state"],
150+
"lastStartTag":"script",
151+
"input":"<!-- <script></script> --></script>",
152+
"output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
153+
},
154+
{
155+
"description":"</script> in script HTML comment - double escaped with nested <script>",
156+
"initialStates":["Script data state"],
157+
"lastStartTag":"script",
158+
"input":"<!-- <script><script></script></script> --></script>",
159+
"output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
160+
},
161+
{
162+
"description":"</script> in script HTML comment - double escaped with abrupt end",
163+
"initialStates":["Script data state"],
164+
"lastStartTag":"script",
165+
"input":"<!-- <script>--></script> --></script>",
166+
"output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
167+
},
168+
{
169+
"description":"Incomplete start tag in script HTML comment double escaped",
170+
"initialStates":["Script data state"],
171+
"lastStartTag":"script",
172+
"input":"<!--<scrip></script>-->",
173+
"output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
174+
},
175+
{
176+
"description":"Unclosed start tag in script HTML comment double escaped",
177+
"initialStates":["Script data state"],
178+
"lastStartTag":"script",
179+
"input":"<!--<script</script>-->",
180+
"output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
181+
},
182+
{
183+
"description":"Incomplete end tag in script HTML comment double escaped",
184+
"initialStates":["Script data state"],
185+
"lastStartTag":"script",
186+
"input":"<!--<script></scrip>-->",
187+
"output":[["Character", "<!--<script></scrip>-->"]]
188+
},
189+
{
190+
"description":"Unclosed end tag in script HTML comment double escaped",
191+
"initialStates":["Script data state"],
192+
"lastStartTag":"script",
193+
"input":"<!--<script></script-->",
194+
"output":[["Character", "<!--<script></script-->"]]
195+
},
115196
{
116197
"description":"leading U+FEFF must pass through",
198+
"initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
117199
"doubleEscaped":true,
118200
"input":"\\uFEFFfoo\\uFEFFbar",
119201
"output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
120202
},
121203
{
122-
"description":"Non BMP-charref in in RCDATA",
204+
"description":"Non BMP-charref in RCDATA",
123205
"initialStates":["RCDATA state"],
124206
"input":"&NotEqualTilde;",
125207
"output":[["Character", "\u2242\u0338"]]
126208
},
127209
{
128-
"description":"Bad charref in in RCDATA",
210+
"description":"Bad charref in RCDATA",
129211
"initialStates":["RCDATA state"],
130212
"input":"&NotEqualTild;",
131213
"output":[["Character", "&NotEqualTild;"]],
@@ -134,36 +216,36 @@
134216
]
135217
},
136218
{
137-
"description":"lowercase endtags in RCDATA and RAWTEXT",
138-
"initialStates":["RCDATA state", "RAWTEXT state"],
219+
"description":"lowercase endtags",
220+
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
139221
"lastStartTag":"xmp",
140222
"input":"</XMP>",
141223
"output":[["EndTag","xmp"]]
142224
},
143225
{
144-
"description":"bad endtag in RCDATA and RAWTEXT",
145-
"initialStates":["RCDATA state", "RAWTEXT state"],
226+
"description":"bad endtag (space before name)",
227+
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
146228
"lastStartTag":"xmp",
147229
"input":"</ XMP>",
148230
"output":[["Character","</ XMP>"]]
149231
},
150232
{
151-
"description":"bad endtag in RCDATA and RAWTEXT",
152-
"initialStates":["RCDATA state", "RAWTEXT state"],
233+
"description":"bad endtag (not matching last start tag)",
234+
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
153235
"lastStartTag":"xmp",
154236
"input":"</xm>",
155237
"output":[["Character","</xm>"]]
156238
},
157239
{
158-
"description":"bad endtag in RCDATA and RAWTEXT",
159-
"initialStates":["RCDATA state", "RAWTEXT state"],
240+
"description":"bad endtag (without close bracket)",
241+
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
160242
"lastStartTag":"xmp",
161243
"input":"</xm ",
162244
"output":[["Character","</xm "]]
163245
},
164246
{
165-
"description":"bad endtag in RCDATA and RAWTEXT",
166-
"initialStates":["RCDATA state", "RAWTEXT state"],
247+
"description":"bad endtag (trailing solidus)",
248+
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
167249
"lastStartTag":"xmp",
168250
"input":"</xm/",
169251
"output":[["Character","</xm/"]]
@@ -200,11 +282,47 @@
200282
},
201283
{
202284
"description":"CDATA content",
203-
"input":"foo&bar",
285+
"input":"foo&#32;]]>",
286+
"initialStates":["CDATA section state"],
287+
"output":[["Character", "foo&#32;"]]
288+
},
289+
{
290+
"description":"CDATA followed by HTML content",
291+
"input":"foo&#32;]]>&#32;",
292+
"initialStates":["CDATA section state"],
293+
"output":[["Character", "foo&#32; "]]
294+
},
295+
{
296+
"description":"CDATA with extra bracket",
297+
"input":"foo]]]>",
298+
"initialStates":["CDATA section state"],
299+
"output":[["Character", "foo]"]]
300+
},
301+
{
302+
"description":"CDATA without end marker",
303+
"input":"foo",
304+
"initialStates":["CDATA section state"],
305+
"output":[["Character", "foo"]],
306+
"errors":[
307+
{ "code": "eof-in-cdata", "line": 1, "col": 4 }
308+
]
309+
},
310+
{
311+
"description":"CDATA with single bracket ending",
312+
"input":"foo]",
313+
"initialStates":["CDATA section state"],
314+
"output":[["Character", "foo]"]],
315+
"errors":[
316+
{ "code": "eof-in-cdata", "line": 1, "col": 5 }
317+
]
318+
},
319+
{
320+
"description":"CDATA with two brackets ending",
321+
"input":"foo]]",
204322
"initialStates":["CDATA section state"],
205-
"output":[["Character", "foo&bar"]],
323+
"output":[["Character", "foo]]"]],
206324
"errors":[
207-
{ "code": "eof-in-cdata", "line": 1, "col": 8 }
325+
{ "code": "eof-in-cdata", "line": 1, "col": 6 }
208326
]
209327
}
210328

tokenizer/entities.test

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,47 @@
11
{"tests": [
22

3-
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
3+
{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
4+
"input":"<h a=\"&noti;\">",
5+
"output": [["StartTag", "h", {"a": "&noti;"}]]},
6+
7+
{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.",
8+
"input":"<h a=\"&lang=\">",
9+
"output": [["StartTag", "h", {"a": "&lang="}]]},
10+
11+
{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.",
12+
"input":"<h a=\"&not=\">",
13+
"output": [["StartTag", "h", {"a": "&not="}]]},
14+
15+
{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
416
"input":"<h a='&noti;'>",
517
"output": [["StartTag", "h", {"a": "&noti;"}]]},
618

7-
{"description": "Entity name followed by the equals sign in an attribute value.",
19+
{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.",
820
"input":"<h a='&lang='>",
921
"output": [["StartTag", "h", {"a": "&lang="}]]},
1022

23+
{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.",
24+
"input":"<h a='&not='>",
25+
"output": [["StartTag", "h", {"a": "&not="}]]},
26+
27+
{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.",
28+
"input":"<h a=&noti;>",
29+
"output": [["StartTag", "h", {"a": "&noti;"}]]},
30+
31+
{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.",
32+
"input":"<h a=&lang=>",
33+
"output": [["StartTag", "h", {"a": "&lang="}]],
34+
"errors":[
35+
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 }
36+
]},
37+
38+
{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.",
39+
"input":"<h a=&not=>",
40+
"output": [["StartTag", "h", {"a": "&not="}]],
41+
"errors":[
42+
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 }
43+
]},
44+
1145
{"description": "Ambiguous ampersand.",
1246
"input":"&rrrraannddom;",
1347
"output": [["Character", "&rrrraannddom;"]],

0 commit comments

Comments
 (0)