Skip to content

Commit fc3f645

Browse files
committed
Improve parsing of escape sequences in identifiers and URIs.
Implement proper parsing of escape sequences in unquoted URL function arguments, and ensure escape sequences in quoted URLs are properly translated when creating a PropertyValuePart. The secondary parsing in PropertyValuePart to assign a type string can fail when the identifier contains escape sequences. Pass in a `hint` object based on the original token emitted from the lexer to disambiguate in these cases.
1 parent 90e7097 commit fc3f645

File tree

5 files changed

+121
-38
lines changed

5 files changed

+121
-38
lines changed

src/css/Parser.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,7 @@ Parser.prototype = function(){
17011701
unary = null,
17021702
value = null,
17031703
endChar = null,
1704+
part = null,
17041705
token,
17051706
line,
17061707
col;
@@ -1744,6 +1745,9 @@ Parser.prototype = function(){
17441745
if (unary === null){
17451746
line = tokenStream.token().startLine;
17461747
col = tokenStream.token().startCol;
1748+
// Correct potentially-inaccurate IDENT parsing in
1749+
// PropertyValuePart constructor.
1750+
part = PropertyValuePart.fromToken(tokenStream.token());
17471751
}
17481752
this._readWhitespace();
17491753
} else {
@@ -1787,7 +1791,7 @@ Parser.prototype = function(){
17871791

17881792
}
17891793

1790-
return value !== null ?
1794+
return part !== null ? part : value !== null ?
17911795
new PropertyValuePart(unary !== null ? unary + value : value, line, col) :
17921796
null;
17931797

src/css/PropertyValuePart.js

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*global SyntaxUnit, Parser, Colors*/
1+
/*global Colors, Parser, SyntaxUnit, Tokens*/
22
/**
33
* Represents a single part of a CSS property value, meaning that it represents
44
* just one part of the data between ":" and ";".
@@ -10,7 +10,8 @@
1010
* @extends parserlib.util.SyntaxUnit
1111
* @constructor
1212
*/
13-
function PropertyValuePart(text, line, col){
13+
function PropertyValuePart(text, line, col, optionalHint){
14+
var hint = optionalHint || {};
1415

1516
SyntaxUnit.call(this, text, line, col, Parser.PROPERTY_VALUE_PART_TYPE);
1617

@@ -135,9 +136,10 @@ function PropertyValuePart(text, line, col){
135136
this.saturation = +RegExp.$2 / 100;
136137
this.lightness = +RegExp.$3 / 100;
137138
this.alpha = +RegExp.$4;
138-
} else if (/^url\(["']?([^\)"']+)["']?\)/i.test(text)){ //URI
139+
} else if (/^url\(("([^\\"]|\\.)*")\)/i.test(text)){ //URI
140+
// generated by TokenStream.readURI, so always double-quoted.
139141
this.type = "uri";
140-
this.uri = RegExp.$1;
142+
this.uri = PropertyValuePart.parseString(RegExp.$1);
141143
} else if (/^([^\(]+)\(/i.test(text)){
142144
this.type = "function";
143145
this.name = RegExp.$1;
@@ -157,11 +159,17 @@ function PropertyValuePart(text, line, col){
157159
} else if (/^[\,\/]$/.test(text)){
158160
this.type = "operator";
159161
this.value = text;
160-
} else if (/^[a-z\-_\u0080-\uFFFF][a-z0-9\-_\u0080-\uFFFF]*$/i.test(text)){
162+
} else if (/^-?[a-z_\u00A0-\uFFFF][a-z0-9\-_\u00A0-\uFFFF]*$/i.test(text)){
161163
this.type = "identifier";
162164
this.value = text;
163165
}
164166

167+
// There can be ambiguity with escape sequences in identifiers, as
168+
// well as with "color" parts which are also "identifiers", so record
169+
// an explicit hint when the token generating this PropertyValuePart
170+
// was an identifier.
171+
this.wasIdent = !!hint.ident;
172+
165173
}
166174

167175
PropertyValuePart.prototype = new SyntaxUnit();
@@ -217,5 +225,11 @@ PropertyValuePart.serializeString = function(value) {
217225
* @method fromToken
218226
*/
219227
PropertyValuePart.fromToken = function(token){
220-
return new PropertyValuePart(token.value, token.startLine, token.startCol);
228+
var part = new PropertyValuePart(token.value, token.startLine, token.startCol, {
229+
// Tokens can have escaped characters that would fool the type
230+
// identification in the PropertyValuePart constructor, so pass
231+
// in a hint if this was an identifier.
232+
ident: token.type === Tokens.IDENT
233+
});
234+
return part;
221235
};

src/css/TokenStream.js

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
/*global Tokens, TokenStreamBase*/
1+
/*global PropertyValuePart, Tokens, TokenStreamBase*/
22

33
var h = /^[0-9a-fA-F]$/,
4-
//nonascii = /^[\u00A0-\uFFFF]$/,
4+
nonascii = /^[\u00A0-\uFFFF]$/,
55
nl = /\n|\r\n|\r|\f/,
66
whitespace = /\u0009|\u000a|\u000c|\u000d|\u0020/;
77

@@ -221,7 +221,7 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
221221
*/
222222
case "\\":
223223
if (/[^\r\n\f]/.test(reader.peek())) {
224-
token = this.identOrFunctionToken(c, startLine, startCol);
224+
token = this.identOrFunctionToken(this.readEscape(c, true), startLine, startCol);
225225
} else {
226226
token = this.charToken(c, startLine, startCol);
227227
}
@@ -521,18 +521,22 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
521521
var reader = this._reader,
522522
ident = this.readName(first),
523523
tt = Tokens.IDENT,
524-
uriFns = ["url(", "url-prefix(", "domain("];
524+
uriFns = ["url(", "url-prefix(", "domain("],
525+
uri;
525526

526527
//if there's a left paren immediately after, it's a URI or function
527528
if (reader.peek() === "("){
528529
ident += reader.read();
529530
if (uriFns.indexOf(ident.toLowerCase()) > -1){
530-
tt = Tokens.URI;
531-
ident = this.readURI(ident);
532-
533-
//didn't find a valid URL or there's no closing paren
534-
if (uriFns.indexOf(ident.toLowerCase()) > -1){
531+
reader.mark();
532+
uri = this.readURI(ident);
533+
if (uri === null) {
534+
//didn't find a valid URL or there's no closing paren
535+
reader.reset();
535536
tt = Tokens.FUNCTION;
537+
} else {
538+
tt = Tokens.URI;
539+
ident = uri;
536540
}
537541
} else {
538542
tt = Tokens.FUNCTION;
@@ -876,18 +880,20 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
876880

877881
return number;
878882
},
883+
884+
// returns null w/o resetting reader if string is invalid.
879885
readString: function(){
880886
var token = this.stringToken(this._reader.read(), 0, 0);
881-
return token.type === Tokens.INVALID ? "" : token.value;
887+
return token.type === Tokens.INVALID ? null : token.value;
882888
},
889+
890+
// returns null w/o resetting reader if URI is invalid.
883891
readURI: function(first){
884892
var reader = this._reader,
885893
uri = first,
886894
inner = "",
887895
c = reader.peek();
888896

889-
reader.mark();
890-
891897
//skip whitespace before
892898
while(c && isWhitespace(c)){
893899
reader.read();
@@ -897,8 +903,11 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
897903
//it's a string
898904
if (c === "'" || c === "\""){
899905
inner = this.readString();
906+
if (inner !== null) {
907+
inner = PropertyValuePart.parseString(inner);
908+
}
900909
} else {
901-
inner = this.readURL();
910+
inner = this.readUnquotedURL();
902911
}
903912

904913
c = reader.peek();
@@ -910,46 +919,60 @@ TokenStream.prototype = mix(new TokenStreamBase(), {
910919
}
911920

912921
//if there was no inner value or the next character isn't closing paren, it's not a URI
913-
if (inner === "" || c !== ")"){
914-
uri = first;
915-
reader.reset();
922+
if (inner === null || c !== ")"){
923+
uri = null;
916924
} else {
917-
uri += inner + reader.read();
925+
// Ensure argument to URL is always double-quoted
926+
// (This simplifies later processing in PropertyValuePart.)
927+
uri += PropertyValuePart.serializeString(inner) + reader.read();
918928
}
919929

920930
return uri;
921931
},
922-
readURL: function(){
932+
// This method never fails, although it may return an empty string.
933+
readUnquotedURL: function(first){
923934
var reader = this._reader,
924-
url = "",
925-
c = reader.peek();
935+
url = first || "",
936+
c;
926937

927-
//TODO: Check for escape and nonascii
928-
while (/^[!#$%&\\*-~]$/.test(c)){
929-
url += reader.read();
930-
c = reader.peek();
938+
for (c = reader.peek(); c; c = reader.peek()) {
939+
// Note that the grammar at
940+
// https://www.w3.org/TR/CSS2/grammar.html#scanner
941+
// incorrectly includes the backslash character in the
942+
// `url` production, although it is correctly omitted in
943+
// the `baduri1` production.
944+
if (nonascii.test(c) || /^[\-!#$%&*-\[\]-~]$/.test(c)) {
945+
url += c;
946+
reader.read();
947+
} else if (c === '\\') {
948+
if (/^[^\r\n\f]$/.test(reader.peek(2))) {
949+
url += this.readEscape(reader.read(), true);
950+
} else {
951+
break; // bad escape sequence.
952+
}
953+
} else {
954+
break; // bad character
955+
}
931956
}
932957

933958
return url;
934-
935959
},
960+
936961
readName: function(first){
937962
var reader = this._reader,
938963
ident = first || "",
939-
c = reader.peek();
964+
c;
940965

941-
while(true){
966+
for (c = reader.peek(); c; c = reader.peek()) {
942967
if (c === "\\"){
943968
if (/^[^\r\n\f]$/.test(reader.peek(2))) {
944969
ident += this.readEscape(reader.read(), true);
945-
c = reader.peek();
946970
} else {
947971
// Bad escape sequence.
948972
break;
949973
}
950-
} else if(c && isNameChar(c)){
974+
} else if(isNameChar(c)){
951975
ident += reader.read();
952-
c = reader.peek();
953976
} else {
954977
break;
955978
}

src/css/ValidationTypes.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ var ValidationTypes = {
117117

118118
//any identifier
119119
"<ident>": function(part){
120-
return part.type === "identifier";
120+
return part.type === "identifier" || part.wasIdent;
121121
},
122122

123123
"<length>": function(part){

tests/css/Parser.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,26 @@
981981

982982
name: "Property Values",
983983

984+
testIdentifierValue: function(){
985+
var parser = new Parser();
986+
var result = parser.parsePropertyValue("foo");
987+
988+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
989+
Assert.areEqual(1, result.parts.length);
990+
Assert.areEqual("identifier", result.parts[0].type);
991+
},
992+
993+
testIdentifierValue2: function(){
994+
var parser = new Parser();
995+
// Identifiers can even start with digits, iff they are escaped.
996+
var result = parser.parsePropertyValue("\\30 \\0000307");
997+
998+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
999+
Assert.areEqual(1, result.parts.length);
1000+
Assert.areEqual("007", result.parts[0].text);
1001+
Assert.areEqual(true, result.parts[0].wasIdent);
1002+
},
1003+
9841004
testDimensionValuePx: function(){
9851005
var parser = new Parser();
9861006
var result = parser.parsePropertyValue("1px");
@@ -1176,6 +1196,18 @@
11761196
Assert.areEqual(0, result.parts[0].blue);
11771197
},
11781198

1199+
testColorValue2: function(){
1200+
var parser = new Parser();
1201+
var result = parser.parsePropertyValue("\\r\\45 d");
1202+
1203+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
1204+
Assert.areEqual(1, result.parts.length);
1205+
Assert.areEqual("color", result.parts[0].type);
1206+
Assert.areEqual(255, result.parts[0].red);
1207+
Assert.areEqual(0, result.parts[0].green);
1208+
Assert.areEqual(0, result.parts[0].blue);
1209+
},
1210+
11791211
testCSS2SystemColorValue: function(){
11801212
var parser = new Parser();
11811213
var result = parser.parsePropertyValue("InfoText");
@@ -1215,6 +1247,16 @@
12151247
Assert.areEqual("http://www.yahoo.com", result.parts[0].uri);
12161248
},
12171249

1250+
testURIValue4: function(){
1251+
var parser = new Parser();
1252+
var result = parser.parsePropertyValue("url(http\\03a\r\n//www.yahoo.com)");
1253+
1254+
Assert.isInstanceOf(parserlib.css.PropertyValue, result);
1255+
Assert.areEqual(1, result.parts.length);
1256+
Assert.areEqual("uri", result.parts[0].type);
1257+
Assert.areEqual("http://www.yahoo.com", result.parts[0].uri);
1258+
},
1259+
12181260
testStringValue: function(){
12191261
var parser = new Parser();
12201262
var result = parser.parsePropertyValue("'Hello world!'");

0 commit comments

Comments
 (0)