Fix #8

astrada · astrada · commit 37bd91bf3413 · 2021-01-25T14:04:52.000+01:00
Treat `.` and `#` as `:`
diff --git a/README.md b/README.md
@@ -48,3 +48,12 @@ let css =
   *)
 ```
 
+### Remarks
+
+Whitespaces and comments are discarded by the lexer, so they are not available
+to the parser. An exception is made for significant whitespaces in rule
+preludes, to disambiguate between selectors like `p :first-child` and
+`p:first-child`. These whitespaces are replaced with `*` to keep CSS semantics
+intact. So, e.g., `p :first-child` is parsed as `p *:first-child`, `p .class`
+as `p *.class`, and `p #id` as `p *#id`.
+
diff --git a/lib/lexer.ml b/lib/lexer.ml
@@ -40,8 +40,11 @@ let token_to_string = function
   | LEFT_BRACKET -> "["
   | RIGHT_BRACKET -> "]"
   | COLON -> ":"
-  | WHITESPACE_BEFORE_COLON -> "*"
-  | WHITESPACE_COLON -> "*:"
+  | DOT -> "."
+  (* Whitespaces are detected only in selectors, before ":", ".", and "#", to
+   * disambiguate between "p :first-child" and "p:first-child", these
+   * whitespaces are replaced with "*" *)
+  | WHITESPACE -> "*"
   | SEMI_COLON -> ";"
   | PERCENTAGE -> "%"
   | IMPORTANT -> "!important"
@@ -262,34 +265,39 @@ let discard_comments_and_white_spaces buf =
   in
   discard_white_spaces buf false
 
-let rec get_next_token buf spaces_detected =
+let rec get_next_tokens buf spaces_detected =
   let open Menhir_parser in
   match%sedlex buf with
-  | eof -> EOF
-  | ';' -> SEMI_COLON
-  | '}' -> RIGHT_BRACE
-  | '{' -> LEFT_BRACE
-  | ':' -> if spaces_detected then WHITESPACE_COLON else COLON
-  | '(' -> LEFT_PAREN
-  | ')' -> RIGHT_PAREN
-  | '[' -> LEFT_BRACKET
-  | ']' -> RIGHT_BRACKET
-  | '%' -> PERCENTAGE
-  | operator -> OPERATOR (Lex_buffer.latin1 buf)
-  | string -> STRING (Lex_buffer.latin1 ~skip:1 ~drop:1 buf)
-  | "url(" -> get_url "" buf
-  | important -> IMPORTANT
-  | nested_at_rule -> NESTED_AT_RULE (Lex_buffer.latin1 ~skip:1 buf)
-  | at_rule_without_body -> AT_RULE_WITHOUT_BODY (Lex_buffer.latin1 ~skip:1 buf)
-  | at_rule -> AT_RULE (Lex_buffer.latin1 ~skip:1 buf)
+  | eof -> [ EOF ]
+  | ';' -> [ SEMI_COLON ]
+  | '}' -> [ RIGHT_BRACE ]
+  | '{' -> [ LEFT_BRACE ]
+  | ':' -> if spaces_detected then [ WHITESPACE; COLON ] else [ COLON ]
+  | '.' -> if spaces_detected then [ WHITESPACE; DOT ] else [ DOT ]
+  | '(' -> [ LEFT_PAREN ]
+  | ')' -> [ RIGHT_PAREN ]
+  | '[' -> [ LEFT_BRACKET ]
+  | ']' -> [ RIGHT_BRACKET ]
+  | '%' -> [ PERCENTAGE ]
+  | operator -> [ OPERATOR (Lex_buffer.latin1 buf) ]
+  | string -> [ STRING (Lex_buffer.latin1 ~skip:1 ~drop:1 buf) ]
+  | "url(" -> [ get_url "" buf ]
+  | important -> [ IMPORTANT ]
+  | nested_at_rule -> [ NESTED_AT_RULE (Lex_buffer.latin1 ~skip:1 buf) ]
+  | at_rule_without_body ->
+      [ AT_RULE_WITHOUT_BODY (Lex_buffer.latin1 ~skip:1 buf) ]
+  | at_rule -> [ AT_RULE (Lex_buffer.latin1 ~skip:1 buf) ]
   (* NOTE: should be placed above ident, otherwise pattern with
    * '-[0-9a-z]{1,6}' cannot be matched *)
-  | _u, '+', unicode_range -> UNICODE_RANGE (Lex_buffer.latin1 buf)
-  | ident, '(' -> FUNCTION (Lex_buffer.latin1 ~drop:1 buf)
-  | ident -> IDENT (Lex_buffer.latin1 buf)
-  | '#', name -> HASH (Lex_buffer.latin1 ~skip:1 buf)
-  | number -> get_dimension (Lex_buffer.latin1 buf) buf
-  | any -> DELIM (Lex_buffer.latin1 buf)
+  | _u, '+', unicode_range -> [ UNICODE_RANGE (Lex_buffer.latin1 buf) ]
+  | ident, '(' -> [ FUNCTION (Lex_buffer.latin1 ~drop:1 buf) ]
+  | ident -> [ IDENT (Lex_buffer.latin1 buf) ]
+  | '#', name ->
+      if spaces_detected then
+        [ WHITESPACE; HASH (Lex_buffer.latin1 ~skip:1 buf) ]
+      else [ HASH (Lex_buffer.latin1 ~skip:1 buf) ]
+  | number -> [ get_dimension (Lex_buffer.latin1 buf) buf ]
+  | any -> [ DELIM (Lex_buffer.latin1 buf) ]
   | _ -> assert false
 
 and get_dimension n buf =
@@ -316,25 +324,19 @@ and get_url url buf =
 
 let token_queue = Queue.create ()
 
-let queue_next_token_with_location buf =
+let queue_next_tokens_with_location buf =
   let spaces_detected = discard_comments_and_white_spaces buf in
   let loc_start = Lex_buffer.next_loc buf in
-  let token = get_next_token buf spaces_detected in
+  let tokens = get_next_tokens buf spaces_detected in
   let loc_end = Lex_buffer.next_loc buf in
-  match token with
-  | Menhir_parser.WHITESPACE_COLON ->
-      Queue.add
-        (Menhir_parser.WHITESPACE_BEFORE_COLON, loc_start, loc_end)
-        token_queue;
-      Queue.add (Menhir_parser.COLON, loc_start, loc_end) token_queue
-  | _ -> Queue.add (token, loc_start, loc_end) token_queue
+  List.iter (fun t -> Queue.add (t, loc_start, loc_end) token_queue) tokens
 
 let parse buf p =
   let last_token =
     ref (Menhir_parser.EOF, Lexing.dummy_pos, Lexing.dummy_pos)
   in
   let next_token () =
-    if Queue.is_empty token_queue then queue_next_token_with_location buf;
+    if Queue.is_empty token_queue then queue_next_tokens_with_location buf;
     last_token := Queue.take token_queue;
     !last_token
   in
diff --git a/lib/menhir_parser.mly b/lib/menhir_parser.mly
@@ -15,8 +15,11 @@ open Types
 %token LEFT_BRACKET
 %token RIGHT_BRACKET
 %token COLON
-%token WHITESPACE_BEFORE_COLON
-%token WHITESPACE_COLON
+%token DOT
+(* Whitespaces are detected only in selectors, before ":", ".", and "#", to
+ * disambiguate between "p :first-child" and "p:first-child", these
+ * whitespaces are replaced with "*" *)
+%token WHITESPACE
 %token SEMI_COLON
 %token PERCENTAGE
 %token IMPORTANT
@@ -101,7 +104,7 @@ prelude_with_loc:
   ;
 
 prelude:
-  xs = list(component_value_with_loc) { xs }
+  xs = list(component_value_with_loc_in_prelude) { xs }
   ;
 
 declarations_with_loc:
@@ -124,7 +127,7 @@ declaration_or_at_rule:
   ;
 
 declaration:
-  n = IDENT; option(WHITESPACE_BEFORE_COLON); COLON; v = list(component_value_with_loc); i = boption(IMPORTANT) {
+  n = IDENT; option(WHITESPACE); COLON; v = list(component_value_with_loc); i = boption(IMPORTANT) {
     { Declaration.name = (n, Lex_buffer.make_loc_and_fix $startpos(n) $endpos(n));
       value = (v, Lex_buffer.make_loc_and_fix $startpos(v) $endpos(v));
       important = (i, Lex_buffer.make_loc_and_fix $startpos(i) $endpos(i));
@@ -153,9 +156,34 @@ component_value:
   | u = URI { Component_value.Uri u }
   | o = OPERATOR { Component_value.Operator o }
   | d = DELIM { Component_value.Delim d }
+  | option(WHITESPACE); COLON { Component_value.Delim ":" }
+  | option(WHITESPACE); DOT { Component_value.Delim "." }
+  | f = FUNCTION; xs = list(component_value_with_loc); RIGHT_PAREN {
+      Component_value.Function ((f, Lex_buffer.make_loc_and_fix $startpos(f) $endpos(f)),
+                                (xs, Lex_buffer.make_loc_and_fix $startpos(xs) $endpos(xs)))
+    }
+  | option(WHITESPACE); h = HASH { Component_value.Hash h }
+  | n = NUMBER { Component_value.Number n }
+  | r = UNICODE_RANGE { Component_value.Unicode_range r }
+  | d = FLOAT_DIMENSION { Component_value.Float_dimension d }
+  | d = DIMENSION { Component_value.Dimension d }
+  ;
+
+component_value_with_loc_in_prelude:
+  | c = component_value_in_prelude { (c, Lex_buffer.make_loc_and_fix $startpos $endpos) }
+
+component_value_in_prelude:
+  | b = paren_block { Component_value.Paren_block b }
+  | b = bracket_block { Component_value.Bracket_block b }
+  | n = NUMBER; PERCENTAGE { Component_value.Percentage n }
+  | i = IDENT { Component_value.Ident i }
+  | s = STRING { Component_value.String s }
+  | u = URI { Component_value.Uri u }
+  | o = OPERATOR { Component_value.Operator o }
+  | d = DELIM { Component_value.Delim d }
+  | WHITESPACE { Component_value.Delim "*" }
   | COLON { Component_value.Delim ":" }
-  | WHITESPACE_BEFORE_COLON { Component_value.Delim "*" }
-  | COLON { Component_value.Delim ":" }
+  | DOT { Component_value.Delim "." }
   | f = FUNCTION; xs = list(component_value_with_loc); RIGHT_PAREN {
       Component_value.Function ((f, Lex_buffer.make_loc_and_fix $startpos(f) $endpos(f)),
                                 (xs, Lex_buffer.make_loc_and_fix $startpos(xs) $endpos(xs)))
diff --git a/test/test_parser.ml b/test/test_parser.ml
@@ -899,7 +899,7 @@ let test_hover_selector () =
 
 let test_id_selector () =
   let css = {|
-#element {
+#id {
   color: blue
 }
 |} in
@@ -909,7 +909,10 @@ let test_id_selector () =
         Rule.Style_rule
           {
             Style_rule.prelude =
-              ( [ (Component_value.Hash "element", Location.none) ],
+              ( [
+                  (Component_value.Delim "*", Location.none);
+                  (Component_value.Hash "id", Location.none);
+                ],
                 Location.none );
             block =
               ( [
@@ -934,7 +937,7 @@ let test_id_selector () =
 
 let test_class_selector () =
   let css = {|
-.element {
+.classname {
   color: blue
 }
 |} in
@@ -945,8 +948,9 @@ let test_class_selector () =
           {
             Style_rule.prelude =
               ( [
+                  (Component_value.Delim "*", Location.none);
                   (Component_value.Delim ".", Location.none);
-                  (Component_value.Ident "element", Location.none);
+                  (Component_value.Ident "classname", Location.none);
                 ],
                 Location.none );
             block =
@@ -1261,6 +1265,45 @@ p :first-child {
   Alcotest.(check (testable Css_fmt_printer.dump_stylesheet eq_ast))
     "different CSS AST" expected_ast ast
 
+let test_p_first_child_selector () =
+  let css = {|
+p:first-child {
+  color: blue
+}
+|} in
+  let ast = Css.Parser.parse_stylesheet css in
+  let expected_ast =
+    ( [
+        Rule.Style_rule
+          {
+            Style_rule.prelude =
+              ( [
+                  (Component_value.Ident "p", Location.none);
+                  (Component_value.Delim ":", Location.none);
+                  (Component_value.Ident "first-child", Location.none);
+                ],
+                Location.none );
+            block =
+              ( [
+                  Declaration_list.Declaration
+                    {
+                      Declaration.name = ("color", Location.none);
+                      value =
+                        ( [ (Component_value.Ident "blue", Location.none) ],
+                          Location.none );
+                      important = (false, Location.none);
+                      loc = Location.none;
+                    };
+                ],
+                Location.none );
+            loc = Location.none;
+          };
+      ],
+      Location.none )
+  in
+  Alcotest.(check (testable Css_fmt_printer.dump_stylesheet eq_ast))
+    "different CSS AST" expected_ast ast
+
 let test_p_star_space_first_child_selector () =
   let css = {|
 p * :first-child {
@@ -1302,9 +1345,9 @@ p * :first-child {
   Alcotest.(check (testable Css_fmt_printer.dump_stylesheet eq_ast))
     "different CSS AST" expected_ast ast
 
-let test_p_first_child_selector () =
+let test_p_space_dot_classname () =
   let css = {|
-p:first-child {
+p .classname {
   color: blue
 }
 |} in
@@ -1316,8 +1359,48 @@ p:first-child {
             Style_rule.prelude =
               ( [
                   (Component_value.Ident "p", Location.none);
-                  (Component_value.Delim ":", Location.none);
-                  (Component_value.Ident "first-child", Location.none);
+                  (Component_value.Delim "*", Location.none);
+                  (Component_value.Delim ".", Location.none);
+                  (Component_value.Ident "classname", Location.none);
+                ],
+                Location.none );
+            block =
+              ( [
+                  Declaration_list.Declaration
+                    {
+                      Declaration.name = ("color", Location.none);
+                      value =
+                        ( [ (Component_value.Ident "blue", Location.none) ],
+                          Location.none );
+                      important = (false, Location.none);
+                      loc = Location.none;
+                    };
+                ],
+                Location.none );
+            loc = Location.none;
+          };
+      ],
+      Location.none )
+  in
+  Alcotest.(check (testable Css_fmt_printer.dump_stylesheet eq_ast))
+    "different CSS AST" expected_ast ast
+
+let test_p_space_hash_id () =
+  let css = {|
+p #id {
+  color: blue
+}
+|} in
+  let ast = Css.Parser.parse_stylesheet css in
+  let expected_ast =
+    ( [
+        Rule.Style_rule
+          {
+            Style_rule.prelude =
+              ( [
+                  (Component_value.Ident "p", Location.none);
+                  (Component_value.Delim "*", Location.none);
+                  (Component_value.Hash "id", Location.none);
                 ],
                 Location.none );
             block =
@@ -1370,4 +1453,6 @@ let test_set =
     ("p :first-child selector", `Quick, test_p_space_first_child_selector);
     ("p:first-child selector", `Quick, test_p_first_child_selector);
     ("p * :first-child selector", `Quick, test_p_star_space_first_child_selector);
+    ("p .classname selector", `Quick, test_p_space_dot_classname);
+    ("p #id selector", `Quick, test_p_space_hash_id);
   ]