Skip to content

Commit 2e15e46

Browse files
committed
Emulate Ruby Sass' url() parsing semantics
We've had countless bugs and regressions with parsing url(). This patch is complete refactor of our url() parsing semantics to 100% match that of Ruby Sass. Fixes #674 Spec sass/sass-spec#539
1 parent fbb6fb6 commit 2e15e46

File tree

8 files changed

+103
-22
lines changed

8 files changed

+103
-22
lines changed

src/constants.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ namespace Sass {
141141

142142
// constants for uri parsing (RFC 3986 Appendix A.)
143143
extern const char uri_chars[] = ":;/?!%&#@|[]{}'`^\"*+-.,_=~";
144+
extern const char real_uri_chars[] = "#%&";
144145

145146
// some specific constant character classes
146147
// they must be static to be useable by lexer

src/constants.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ namespace Sass {
144144

145145
// constants for uri parsing (RFC 3986 Appendix A.)
146146
extern const char uri_chars[];
147+
extern const char real_uri_chars[];
147148

148149
// some specific constant character classes
149150
// they must be static to be useable by lexer

src/lexer.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,24 @@ namespace Sass {
7575
return unsigned(chr) > 127;
7676
}
7777

78+
// check if char is outside ascii range
79+
// but with specific ranges (copied from Ruby Sass)
80+
bool is_nonascii(const char& chr)
81+
{
82+
return (
83+
(unsigned(chr) > 127 && unsigned(chr) < 55296) ||
84+
(unsigned(chr) > 57343 && unsigned(chr) < 65534) ||
85+
(unsigned(chr) > 65535 && unsigned(chr) < 1114111)
86+
);
87+
}
88+
89+
// check if char is within a reduced ascii range
90+
// valid in a uri (copied from Ruby Sass)
91+
bool is_uri_character(const char& chr)
92+
{
93+
return unsigned(chr) > 41 && unsigned(chr) < 127;
94+
}
95+
7896
// Match word character (look ahead)
7997
bool is_character(const char& chr)
8098
{
@@ -90,11 +108,13 @@ namespace Sass {
90108
const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; }
91109
const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; }
92110
const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; }
111+
const char* nonascii(const char* src) { return is_nonascii(*src) ? src + 1 : 0; }
93112
const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; }
94113
const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
95114
const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; }
96115
const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; }
97116
const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; }
117+
const char* uri_character(const char* src) { return is_uri_character(*src) ? src + 1 : 0; }
98118

99119
// Match multiple ctype characters.
100120
const char* spaces(const char* src) { return one_plus<space>(src); }

src/lexer.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ namespace Sass {
3232
bool is_alnum(const char& src);
3333
bool is_xdigit(const char& src);
3434
bool is_unicode(const char& src);
35+
bool is_nonascii(const char& src);
3536
bool is_character(const char& src);
37+
bool is_uri_character(const char& src);
3638

3739
// Match a single ctype predicate.
3840
const char* space(const char* src);
@@ -42,7 +44,9 @@ namespace Sass {
4244
const char* alnum(const char* src);
4345
const char* punct(const char* src);
4446
const char* unicode(const char* src);
47+
const char* nonascii(const char* src);
4548
const char* character(const char* src);
49+
const char* uri_character(const char* src);
4650

4751
// Match multiple ctype characters.
4852
const char* spaces(const char* src);

src/parser.cpp

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -501,36 +501,31 @@ namespace Sass {
501501
return p;
502502
}
503503

504-
Arguments* Parser::parse_arguments(bool has_url)
504+
Arguments* Parser::parse_arguments()
505505
{
506506
std::string name(lexed);
507507
Position position = after_token;
508508
Arguments* args = SASS_MEMORY_NEW(ctx.mem, Arguments, pstate);
509509
if (lex_css< exactly<'('> >()) {
510510
// if there's anything there at all
511511
if (!peek_css< exactly<')'> >()) {
512-
do (*args) << parse_argument(has_url);
512+
do (*args) << parse_argument();
513513
while (lex_css< exactly<','> >());
514514
}
515515
if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position);
516516
}
517517
return args;
518518
}
519519

520-
Argument* Parser::parse_argument(bool has_url)
520+
Argument* Parser::parse_argument()
521521
{
522522
if (peek_css< sequence < exactly< hash_lbrace >, exactly< rbrace > > >()) {
523523
position += 2;
524524
css_error("Invalid CSS", " after ", ": expected expression (e.g. 1px, bold), was ");
525525
}
526526

527527
Argument* arg;
528-
// some urls can look like line comments (parse literally - chunk would not work)
529-
if (has_url && lex< sequence < uri_value, lookahead < loosely<')'> > > >(false)) {
530-
String* the_url = parse_interpolated_chunk(lexed);
531-
arg = SASS_MEMORY_NEW(ctx.mem, Argument, the_url->pstate(), the_url);
532-
}
533-
else if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) {
528+
if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) {
534529
lex_css< variable >();
535530
std::string name(Util::normalize_underscores(lexed));
536531
ParserState p = pstate;
@@ -1410,6 +1405,9 @@ namespace Sass {
14101405
}
14111406
return string;
14121407
}
1408+
else if (peek< real_uri_value >()) {
1409+
return parse_url_function_string();
1410+
}
14131411
else if (peek< re_functional >()) {
14141412
return parse_function_call();
14151413
}
@@ -1790,14 +1788,19 @@ namespace Sass {
17901788
return SASS_MEMORY_NEW(ctx.mem, Function_Call, call_pos, name, args);
17911789
}
17921790

1791+
String* Parser::parse_url_function_string()
1792+
{
1793+
lex< real_uri_value >();
1794+
return SASS_MEMORY_NEW(ctx.mem, String_Constant, pstate, lexed);
1795+
}
1796+
17931797
Function_Call* Parser::parse_function_call()
17941798
{
17951799
lex< identifier >();
17961800
std::string name(lexed);
17971801

17981802
ParserState call_pos = pstate;
1799-
bool expect_url = name == "url" || name == "url-prefix";
1800-
Arguments* args = parse_arguments(expect_url);
1803+
Arguments* args = parse_arguments();
18011804
return SASS_MEMORY_NEW(ctx.mem, Function_Call, call_pos, name, args);
18021805
}
18031806

src/parser.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ namespace Sass {
220220
Parameters* parse_parameters();
221221
Parameter* parse_parameter();
222222
Mixin_Call* parse_include_directive();
223-
Arguments* parse_arguments(bool has_url = false);
224-
Argument* parse_argument(bool has_url = false);
223+
Arguments* parse_arguments();
224+
Argument* parse_argument();
225225
Assignment* parse_assignment();
226226
// Propset* parse_propset();
227227
Ruleset* parse_ruleset(Lookahead lookahead, bool is_root = false);
@@ -256,6 +256,7 @@ namespace Sass {
256256
Function_Call* parse_calc_function();
257257
Function_Call* parse_function_call();
258258
Function_Call_Schema* parse_function_call_schema();
259+
String* parse_url_function_string();
259260
String* parse_interpolated_chunk(Token, bool constant = false);
260261
String* parse_string();
261262
String_Constant* parse_static_expression();

src/prelexer.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -914,22 +914,64 @@ namespace Sass {
914914
exactly<'\f'> >(src);
915915
}*/
916916

917-
/* not used anymore - remove?
918917
const char* H(const char* src) {
919918
return std::isxdigit(*src) ? src+1 : 0;
920-
}*/
919+
}
921920

922-
/* not used anymore - remove?
923-
const char* unicode(const char* src) {
921+
const char* W(const char* src) {
922+
return zero_plus< alternatives<
923+
space,
924+
exactly< '\t' >,
925+
exactly< '\r' >,
926+
exactly< '\n' >,
927+
exactly< '\f' >
928+
> >(src);
929+
}
930+
931+
const char* UUNICODE(const char* src) {
924932
return sequence< exactly<'\\'>,
925933
between<H, 1, 6>,
926-
optional< class_char<url_space_chars> > >(src);
927-
}*/
934+
optional< W >
935+
>(src);
936+
}
937+
938+
const char* NONASCII(const char* src) {
939+
return nonascii(src);
940+
}
928941

929-
/* not used anymore - remove?
930942
const char* ESCAPE(const char* src) {
931-
return alternatives< unicode, class_char<escape_chars> >(src);
932-
}*/
943+
return alternatives<
944+
UUNICODE,
945+
sequence<
946+
exactly<'\\'>,
947+
NONASCII,
948+
class_char< escape_chars >
949+
>
950+
>(src);
951+
}
952+
953+
954+
const char* real_uri_value(const char* src) {
955+
return
956+
sequence<
957+
exactly< url_kwd >,
958+
W,
959+
zero_plus< alternatives<
960+
class_char< real_uri_chars >,
961+
uri_character,
962+
NONASCII,
963+
ESCAPE
964+
> >,
965+
alternatives<
966+
sequence<
967+
W,
968+
exactly< ')' >
969+
>,
970+
exactly< hash_lbrace >
971+
>
972+
>
973+
(src);
974+
}
933975

934976
const char* static_string(const char* src) {
935977
const char* pos = src;

src/prelexer.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,15 @@ namespace Sass {
340340
// match urls
341341
const char* url(const char* src);
342342

343+
// match url()
344+
const char* H(const char* src);
345+
const char* W(const char* src);
346+
// `UNICODE` makes VS sad
347+
const char* UUNICODE(const char* src);
348+
const char* NONASCII(const char* src);
349+
const char* ESCAPE(const char* src);
350+
const char* real_uri_value(const char* src);
351+
343352
// Path matching functions.
344353
// const char* folder(const char* src);
345354
// const char* folders(const char* src);

0 commit comments

Comments
 (0)