Skip to content

Commit 7d7cf77

Browse files
authored
Merge pull request #56 from gasche/simpler-lexer
Simpler lexer
2 parents 0600d37 + 467319a commit 7d7cf77

File tree

4 files changed

+65
-79
lines changed

4 files changed

+65
-79
lines changed

bin/test/errors/parsing-errors.t

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ Delimiter problems:
55
$ echo "{{foo" > $PROBLEM
66
$ mustache foo.json $PROBLEM
77
Template parse error:
8-
File "no-closing-mustache.mustache", line 2, character 0: syntax error.
8+
File "no-closing-mustache.mustache", line 2, character 0: '}}' expected.
99
[3]
1010
1111
$ PROBLEM=one-closing-mustache.mustache
1212
$ echo "{{foo}" > $PROBLEM
1313
$ mustache foo.json $PROBLEM
1414
Template parse error:
15-
File "one-closing-mustache.mustache", lines 1-2, characters 6-0:
16-
syntax error.
15+
File "one-closing-mustache.mustache", line 1, character 5: '}}' expected.
1716
[3]
1817
1918
$ PROBLEM=eof-before-variable.mustache
@@ -80,14 +79,14 @@ Mismatches between opening and closing mustaches:
8079
$ echo "{{ foo }}}" > $PROBLEM
8180
$ mustache foo.json $PROBLEM
8281
Template parse error:
83-
File "two-three.mustache", lines 1-2, characters 10-0: syntax error.
82+
File "two-three.mustache", line 1, characters 7-10: '}}' expected.
8483
[3]
8584
8685
$ PROBLEM=three-two.mustache
8786
$ echo "{{{ foo }}" > $PROBLEM
8887
$ mustache foo.json $PROBLEM
8988
Template parse error:
90-
File "three-two.mustache", lines 1-2, characters 10-0: syntax error.
89+
File "three-two.mustache", line 1, characters 8-10: '}}}' expected.
9190
[3]
9291
9392
@@ -115,3 +114,13 @@ Mismatch between section-start and section-end:
115114
File "wrong-nesting.mustache", lines 1-2, characters 41-0:
116115
Section mismatch: {{#foo}} is closed by {{/bar}}.
117116
[3]
117+
118+
119+
Weird cases that may confuse our lexer or parser:
120+
121+
$ PROBLEM=weird-tag-name.mustache
122+
$ echo "{{.weird}} foo bar" > $PROBLEM
123+
$ mustache foo.json $PROBLEM
124+
Template parse error:
125+
File "weird-tag-name.mustache", line 1, character 3: '}}' expected.
126+
[3]

lib/mustache.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ module Render = struct
413413
in
414414

415415
let print_indented_string indent s =
416-
let lines = Mustache_lexer.split_on_char '\n' s in
416+
let lines = String.split_on_char '\n' s in
417417
align indent; Buffer.add_string buf (List.hd lines);
418418
List.iter (fun line ->
419419
Buffer.add_char buf '\n';

lib/mustache_lexer.mll

Lines changed: 37 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -25,35 +25,28 @@
2525

2626
exception Error of string
2727

28-
let tok_arg f lexbuf =
28+
let tok_arg lexbuf f =
2929
let start_p = lexbuf.Lexing.lex_start_p in
3030
let x = f lexbuf in
3131
lexbuf.Lexing.lex_start_p <- start_p;
3232
x
3333

34-
let with_space space f =
35-
tok_arg (fun lexbuf ->
34+
let lex_tag lexbuf space ident tag_end =
35+
tok_arg lexbuf (fun lexbuf ->
3636
let () = space lexbuf in
37-
let x = f lexbuf in
37+
let name = ident lexbuf in
3838
let () = space lexbuf in
39-
x
39+
let () = tag_end lexbuf in
40+
name
4041
)
4142

42-
let split_on_char sep s =
43-
let open String in
44-
let r = ref [] in
45-
let j = ref (length s) in
46-
for i = length s - 1 downto 0 do
47-
if unsafe_get s i = sep then begin
48-
r := sub s (i + 1) (!j - i - 1) :: !r;
49-
j := i
50-
end
51-
done;
52-
sub s 0 !j :: !r
53-
5443
let split_ident ident =
5544
if ident = "." then []
56-
else split_on_char '.' ident
45+
else String.split_on_char '.' ident
46+
47+
let check_mustaches ~expected ~lexed =
48+
if expected <> lexed then
49+
raise (Error (Printf.sprintf "'%s' expected" expected))
5750
}
5851

5952
let blank = [' ' '\t']*
@@ -66,13 +59,12 @@ rule space = parse
6659
| blank newline { new_line lexbuf; space lexbuf }
6760
| blank { () }
6861

69-
and id = parse
70-
| id { lexeme lexbuf }
71-
| eof { raise (Error "id expected") }
72-
7362
and ident = parse
7463
| ident { lexeme lexbuf }
75-
| eof { raise (Error "ident expected") }
64+
| "" { raise (Error "ident expected") }
65+
66+
and end_on expected = parse
67+
| ("}}" | "}}}" | "") as lexed { check_mustaches ~expected ~lexed }
7668

7769
and comment acc = parse
7870
| "}}" { String.concat "" (List.rev acc) }
@@ -82,16 +74,14 @@ and comment acc = parse
8274
| eof { raise (Error "non-terminated comment") }
8375

8476
and mustache = parse
85-
| "{{{" { UNESCAPE_START (with_space space ident lexbuf |> split_ident) }
86-
| "{{&" { UNESCAPE_START_AMPERSAND (with_space space ident lexbuf |> split_ident) }
87-
| "{{#" { SECTION_START (with_space space ident lexbuf |> split_ident) }
88-
| "{{^" { SECTION_INVERT_START (with_space space ident lexbuf |> split_ident) }
89-
| "{{/" { SECTION_END (with_space space ident lexbuf |> split_ident) }
90-
| "{{>" { PARTIAL_START (0, with_space space ident lexbuf) }
91-
| "{{!" { COMMENT (tok_arg (comment []) lexbuf) }
92-
| "{{" { ESCAPE_START (with_space space ident lexbuf |> split_ident) }
93-
| "}}}" { UNESCAPE_END }
94-
| "}}" { END }
77+
| "{{" { ESCAPE (lex_tag lexbuf space ident (end_on "}}") |> split_ident) }
78+
| "{{{" { UNESCAPE (lex_tag lexbuf space ident (end_on "}}}") |> split_ident) }
79+
| "{{&" { UNESCAPE (lex_tag lexbuf space ident (end_on "}}") |> split_ident) }
80+
| "{{#" { OPEN_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) }
81+
| "{{^" { OPEN_INVERTED_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) }
82+
| "{{/" { CLOSE_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) }
83+
| "{{>" { PARTIAL (0, lex_tag lexbuf space ident (end_on "}}")) }
84+
| "{{!" { COMMENT (tok_arg lexbuf (comment [])) }
9585
| raw newline { new_line lexbuf; RAW (lexeme lexbuf) }
9686
| raw { RAW (lexeme lexbuf) }
9787
| ['{' '}'] { RAW (lexeme lexbuf) }
@@ -136,33 +126,24 @@ and mustache = parse
136126
in
137127
loop 0 l
138128
in
139-
let segment_before tail l =
140-
let rec loop acc = function
141-
| [] -> List.rev acc
142-
| l when l == tail -> List.rev acc
143-
| y :: ys -> loop (y :: acc) ys
144-
in
145-
loop [] l
146-
in
147129
let is_standalone toks =
148130
let (skipped, toks) = skip_blanks toks in
149131
match toks with
150-
| (SECTION_START _, _, _) :: (END, _, _) :: toks'
151-
| (SECTION_INVERT_START _, _, _) :: (END, _, _) :: toks'
152-
| (SECTION_END _, _, _) :: (END, _, _) :: toks'
153-
| (PARTIAL_START _, _, _) :: (END, _, _) :: toks'
154-
| (COMMENT _, _, _) :: toks' ->
132+
| ((OPEN_SECTION _
133+
| OPEN_INVERTED_SECTION _
134+
| CLOSE_SECTION _
135+
| PARTIAL _
136+
| COMMENT _), _, _) as tok :: toks' ->
155137
let (_, toks_rest) = skip_blanks toks' in
156138
begin match toks_rest with
157139
| [] | [(EOF, _, _)] ->
158-
let toks_standalone =
159-
segment_before toks' toks |>
160-
function
161-
| [(PARTIAL_START (_, p), loc1, loc2); tok_end] ->
162-
[(PARTIAL_START (skipped, p), loc1, loc2); tok_end]
163-
| toks -> toks
140+
let tok =
141+
match tok with
142+
| (PARTIAL (_, p), loc1, loc2) ->
143+
(PARTIAL (skipped, p), loc1, loc2)
144+
| _ -> tok
164145
in
165-
Some (toks_standalone, toks_rest)
146+
Some (tok, toks_rest)
166147
| _ -> None
167148
end
168149
| _ -> None
@@ -176,9 +157,9 @@ and mustache = parse
176157
| [] ->
177158
let toks = slurp_line () in
178159
match is_standalone toks with
179-
| Some (toks_standalone, toks_rest) ->
180-
buffer := List.tl toks_standalone @ toks_rest;
181-
List.hd toks_standalone
160+
| Some (tok_standalone, toks_rest) ->
161+
buffer := toks_rest;
162+
tok_standalone
182163
| None ->
183164
buffer := List.tl toks; List.hd toks
184165
}

lib/mustache_parser.mly

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,13 @@
3737
%}
3838

3939
%token EOF
40-
%token END
41-
%token <string list> ESCAPE_START
42-
%token <string list> UNESCAPE_START_AMPERSAND
43-
%token <string list> SECTION_INVERT_START
44-
%token <string list> SECTION_START
45-
%token <string list> SECTION_END
46-
%token <int * string> PARTIAL_START
47-
%token <string list> UNESCAPE_START
40+
%token <string list> ESCAPE
41+
%token <string list> UNESCAPE
42+
%token <string list> OPEN_INVERTED_SECTION
43+
%token <string list> OPEN_SECTION
44+
%token <string list> CLOSE_SECTION
45+
%token <int * string> PARTIAL
4846
%token <string> COMMENT
49-
%token UNESCAPE_END
5047

5148
%token <string> RAW
5249

@@ -56,24 +53,23 @@
5653
%%
5754

5855
section:
59-
| ss = SECTION_INVERT_START END
56+
| ss = OPEN_INVERTED_SECTION
6057
e = mustache_expr
61-
se = SECTION_END END {
58+
se = CLOSE_SECTION {
6259
with_loc $sloc
6360
(Inverted_section (parse_section ss se e))
6461
}
65-
| ss = SECTION_START END
62+
| ss = OPEN_SECTION
6663
e = mustache_expr
67-
se = SECTION_END END {
64+
se = CLOSE_SECTION {
6865
with_loc $sloc
6966
(Section (parse_section ss se e))
7067
}
7168

7269
mustache_element:
73-
| elt = UNESCAPE_START UNESCAPE_END { with_loc $sloc (Unescaped elt) }
74-
| elt = UNESCAPE_START_AMPERSAND END { with_loc $sloc (Unescaped elt) }
75-
| elt = ESCAPE_START END { with_loc $sloc (Escaped elt) }
76-
| elt = PARTIAL_START END {
70+
| elt = UNESCAPE { with_loc $sloc (Unescaped elt) }
71+
| elt = ESCAPE { with_loc $sloc (Escaped elt) }
72+
| elt = PARTIAL {
7773
with_loc $sloc
7874
(Partial { indent = fst elt;
7975
name = snd elt;

0 commit comments

Comments
 (0)