Skip to content

Commit 90e7097

Browse files
committed
Properly parse CSS strings with escape sequences.
The CSS escape syntax is not the same as JavaScript escapes -- and besides, using `eval` is evil. Write a proper parser (and serializer) for CSS strings with escape sequences. Regular expressions in this patch based on those found in https://www.w3.org/TR/CSS21/grammar.html#scanner
1 parent 739bf0a commit 90e7097

File tree

3 files changed

+99
-46
lines changed

3 files changed

+99
-46
lines changed

src/css/PropertyValuePart.js

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,12 @@ function PropertyValuePart(text, line, col){
142142
this.type = "function";
143143
this.name = RegExp.$1;
144144
this.value = text;
145-
} else if (/^["'][^"']*["']/.test(text)){ //string
145+
} else if (/^"([^\n\r\f\\"]|\\\r\n|\\[^\r0-9a-f]|\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)*"/i.test(text)){ //double-quoted string
146146
this.type = "string";
147-
this.value = eval(text);
147+
this.value = PropertyValuePart.parseString(text);
148+
} else if (/^'([^\n\r\f\\']|\\\r\n|\\[^\r0-9a-f]|\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)*'/i.test(text)){ //single-quoted string
149+
this.type = "string";
150+
this.value = PropertyValuePart.parseString(text);
148151
} else if (Colors[text.toLowerCase()]){ //named color
149152
this.type = "color";
150153
temp = Colors[text.toLowerCase()].substring(1);
@@ -164,6 +167,46 @@ function PropertyValuePart(text, line, col){
164167
PropertyValuePart.prototype = new SyntaxUnit();
165168
PropertyValuePart.prototype.constructor = PropertyValuePart;
166169

170+
/**
171+
* Helper method to parse a CSS string.
172+
*/
173+
PropertyValuePart.parseString = function(str) {
174+
str = str.slice(1, -1); // Strip surrounding single/double quotes
175+
var replacer = function(match, esc) {
176+
if (/^(\n|\r\n|\r|\f)$/.test(esc)) { return ''; }
177+
var m = /^[0-9a-f]{1,6}/i.exec(esc);
178+
if (m) {
179+
var codePoint = parseInt(m[0], 16);
180+
if (String.fromCodePoint) {
181+
return String.fromCodePoint(codePoint);
182+
} else {
183+
// XXX No support for surrogates on old JavaScript engines.
184+
return String.fromCharCode(codePoint);
185+
}
186+
}
187+
return esc;
188+
};
189+
return str.replace(/\\(\r\n|[^\r0-9a-f]|[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)/ig,
190+
replacer);
191+
};
192+
193+
/**
194+
* Helper method to serialize a CSS string.
195+
*/
196+
PropertyValuePart.serializeString = function(value) {
197+
var replacer = function(match, c) {
198+
if (c === '"') {
199+
return "\\" + c;
200+
}
201+
var cp = String.codePointAt ? String.codePointAt(0) :
202+
// We only escape non-surrogate chars, so using charCodeAt
203+
// is harmless here.
204+
String.charCodeAt(0);
205+
return "\\" + cp.toString(16) + " ";
206+
};
207+
return '"' + value.replace(/["\r\n\f]/g, replacer) + '"';
208+
};
209+
167210
/**
168211
* Create a new syntax unit based solely on the given token.
169212
* Convenience method for creating a new syntax unit when

src/css/TokenStream.js

Lines changed: 34 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -696,26 +696,46 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
696696
var delim = first,
697697
string = first,
698698
reader = this._reader,
699-
prev = first,
700699
tt = Tokens.STRING,
701-
c = reader.read();
700+
c = reader.read(),
701+
i;
702702

703703
while(c){
704704
string += c;
705705

706-
//if the delimiter is found with an escapement, we're done.
707-
if (c === delim && prev !== "\\"){
708-
break;
709-
}
710-
711-
//if there's a newline without an escapement, it's an invalid string
712-
if (isNewLine(reader.peek()) && c !== "\\"){
706+
if (c === "\\") {
707+
c = reader.read();
708+
if (c === null) {
709+
break; // premature EOF after backslash
710+
} else if (/[^\r\n\f0-9a-f]/i.test(c)) {
711+
// single-character escape
712+
string += c;
713+
} else {
714+
// read up to six hex digits
715+
for (i=0; isHexDigit(c) && i<6; i++) {
716+
string += c;
717+
c = reader.read();
718+
}
719+
// swallow trailing newline or space
720+
if (c === "\r" && reader.peek() === "\n") {
721+
string += c;
722+
c = reader.read();
723+
}
724+
if (isWhitespace(c)) {
725+
string += c;
726+
} else {
727+
// This character is null or not part of the escape;
728+
// jump back to the top to process it.
729+
continue;
730+
}
731+
}
732+
} else if (c === delim) {
733+
break; // delimiter found.
734+
} else if (isNewLine(reader.peek())) {
735+
// newline without an escapement: it's an invalid string
713736
tt = Tokens.INVALID;
714737
break;
715738
}
716-
717-
//save previous and get next
718-
prev = c;
719739
c = reader.read();
720740
}
721741

@@ -857,38 +877,8 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
857877
return number;
858878
},
859879
readString: function(){
860-
var reader = this._reader,
861-
delim = reader.read(),
862-
string = delim,
863-
prev = delim,
864-
c = reader.peek();
865-
866-
while(c){
867-
c = reader.read();
868-
string += c;
869-
870-
//if the delimiter is found with an escapement, we're done.
871-
if (c === delim && prev !== "\\"){
872-
break;
873-
}
874-
875-
//if there's a newline without an escapement, it's an invalid string
876-
if (isNewLine(reader.peek()) && c !== "\\"){
877-
string = "";
878-
break;
879-
}
880-
881-
//save previous and get next
882-
prev = c;
883-
c = reader.peek();
884-
}
885-
886-
//if c is null, that means we're out of input and the string was never closed
887-
if (c === null){
888-
string = "";
889-
}
890-
891-
return string;
880+
var token = this.stringToken(this._reader.read(), 0, 0);
881+
return token.type === Tokens.INVALID ? "" : token.value;
892882
},
893883
readURI: function(first){
894884
var reader = this._reader,

tests/css/Parser.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,26 @@
12351235
Assert.areEqual("Hello world!", result.parts[0].value);
12361236
},
12371237

1238+
testStringValue3: function(){
1239+
var parser = new Parser();
1240+
var result = parser.parsePropertyValue("\"Chapter\\A \\0a\t\\00A\r\\000a\n\\0000A\f\\00000a\r\n\\00000AFour\\\"\'\\\n\\\r\n\\\r\\\f\\41\"");
1241+
1242+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
1243+
Assert.areEqual(1, result.parts.length);
1244+
Assert.areEqual("string", result.parts[0].type);
1245+
Assert.areEqual("Chapter\n\n\n\n\n\n\nFour\"\'A", result.parts[0].value);
1246+
},
1247+
1248+
testStringValue4: function(){
1249+
var parser = new Parser();
1250+
var result = parser.parsePropertyValue("\'Chapter\\A \\0a\t\\00A\r\\000a\n\\0000A\f\\00000a\r\n\\00000AFour\"\\\'\\\n\\\r\n\\\r\\\f\\41\'");
1251+
1252+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
1253+
Assert.areEqual(1, result.parts.length);
1254+
Assert.areEqual("string", result.parts[0].type);
1255+
Assert.areEqual("Chapter\n\n\n\n\n\n\nFour\"\'A", result.parts[0].value);
1256+
},
1257+
12381258
testValueWithOperators: function(){
12391259
var parser = new Parser();
12401260
var result = parser.parsePropertyValue("10px / 1em");

0 commit comments

Comments
 (0)