Skip to content

Commit 3f4fba4

Browse files
committed
Strictly parse escape sequences in identifiers.
Make our escape sequence parsing match https://www.w3.org/TR/CSS21/grammar.html#scanner In particular, don't accept "\\\r" as a valid escape sequence, and swallow trailing space for every hex escape (not just 2-digit ones). We also tweak `readName` in the tokenizer to unescape the names it reads, which makes `@ch\041 rset` correctly parse as a `CHARSET_SYM`.
1 parent 4183453 commit 3f4fba4

File tree

3 files changed

+48
-9
lines changed

3 files changed

+48
-9
lines changed

src/css/TokenStream.js

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*global Tokens, TokenStreamBase*/
22

33
var h = /^[0-9a-fA-F]$/,
4-
nonascii = /^[\u0080-\uFFFF]$/,
4+
nonascii = /^[\u00A0-\uFFFF]$/,
55
nl = /\n|\r\n|\r|\f/,
66
whitespace = /\u0009|\u000a|\u000c|\u000d|\u0020/;
77

@@ -27,7 +27,7 @@ function isNewLine(c){
2727
}
2828

2929
function isNameStart(c){
30-
return c !== null && (/[a-z_\u0080-\uFFFF\\]/i.test(c));
30+
return c !== null && (/[a-z_\u00A0-\uFFFF\\]/i.test(c));
3131
}
3232

3333
function isNameChar(c){
@@ -214,6 +214,19 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
214214
token = this.htmlCommentStartToken(c, startLine, startCol);
215215
break;
216216

217+
/*
218+
* Potential tokens:
219+
* - IDENT
220+
* - CHAR
221+
*/
222+
case "\\":
223+
if (/[^\r\n\f]/.test(reader.peek())) {
224+
token = this.identOrFunctionToken(c, startLine, startCol);
225+
} else {
226+
token = this.charToken(c, startLine, startCol);
227+
}
228+
break;
229+
217230
/*
218231
* Potential tokens:
219232
* - UNICODE_RANGE
@@ -942,8 +955,13 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
942955

943956
while(true){
944957
if (c == "\\"){
945-
ident += this.readEscape(reader.read());
946-
c = reader.peek();
958+
if (/^[^\r\n\f]$/.test(reader.peek(2))) {
959+
ident += this.readEscape(reader.read(), true);
960+
c = reader.peek();
961+
} else {
962+
// Bad escape sequence.
963+
break;
964+
}
947965
} else if(c && isNameChar(c)){
948966
ident += reader.read();
949967
c = reader.peek();
@@ -955,7 +973,7 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
955973
return ident;
956974
},
957975

958-
readEscape: function(first){
976+
readEscape: function(first, unescape){
959977
var reader = this._reader,
960978
cssEscape = first || "",
961979
i = 0,
@@ -968,13 +986,31 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
968986
} while(c && isHexDigit(c) && ++i < 6);
969987
}
970988

971-
if (cssEscape.length == 3 && /\s/.test(c) ||
972-
cssEscape.length == 7 || cssEscape.length == 1){
989+
if (cssEscape.length === 1) {
990+
if (/^[^\r\n\f0-9a-f]$/.test(c)) {
973991
reader.read();
992+
if (unescape) { return c; }
993+
} else {
994+
// We should never get here (readName won't call readEscape
995+
// if the escape sequence is bad).
996+
throw new Error("Bad escape sequence.");
997+
}
998+
} else if (c === '\r') {
999+
reader.read();
1000+
if (reader.peek() === '\n') {
1001+
c += reader.read();
1002+
}
1003+
} else if (/^[ \t\n\f]$/.test(c)) {
1004+
reader.read();
9741005
} else {
9751006
c = "";
9761007
}
9771008

1009+
if (unescape) {
1010+
var cp = parseInt(cssEscape.slice(first.length), 16);
1011+
return String.fromCodePoint ? String.fromCodePoint(cp) :
1012+
String.fromCharCode(cp);
1013+
}
9781014
return cssEscape + c;
9791015
},
9801016

tests/css/Parser.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1836,7 +1836,7 @@
18361836

18371837
Assert.isInstanceOf(Selector, result, "Result should be an instance of Selector.");
18381838
Assert.isInstanceOf(SelectorPart, result.parts[0], "First part should be a SelectorPart.");
1839-
Assert.areEqual("#\\31 a2b3c", result.parts[0].toString(), "Selector should be correct.");
1839+
Assert.areEqual("#1a2b3c", result.parts[0].toString(), "Selector should be correct.");
18401840
Assert.areEqual(1, result.parts.length, "Should be one part.");
18411841
}
18421842

tests/css/TokenStream.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@
138138
"#h\\0fllo" : [CSSTokens.HASH],
139139
"#ffeeff" : [CSSTokens.HASH],
140140
"#\\31 a2b3c" : [CSSTokens.HASH],
141-
"#r0\\.5" : [CSSTokens.HASH]
141+
"#r0\\.5" : [CSSTokens.HASH],
142+
// Invalid escape sequence
143+
"#a\\\r" : [CSSTokens.HASH, CSSTokens.CHAR, CSSTokens.S]
142144
}
143145
}));
144146

@@ -150,6 +152,7 @@
150152

151153
var atRules = {
152154
"@charset" : CSSTokens.CHARSET_SYM,
155+
"@ch\\041 rset" : CSSTokens.CHARSET_SYM,
153156
"@import" : CSSTokens.IMPORT_SYM,
154157
"@page" : CSSTokens.PAGE_SYM,
155158
"@media" : CSSTokens.MEDIA_SYM,

0 commit comments

Comments
 (0)