Skip to content

Commit fd2d8ce

Browse files
committed
reimplement and document the handling of standalone tokens
The previous approach to standalone-token handling would peek into a prefix of the line, and decide what to do based on the prefix. This relied on the property that we would have at most one standalone token per line. This is unfortunately not true, consider: ``` Begin. {{#foo}} {{#bar}} Middle. {{/bar}} {{/foo}} End. ``` The new approach processes the whole line at once, failing if it encounters non-whitespace non-standalone token.
1 parent c4f1f74 commit fd2d8ce

File tree

2 files changed

+102
-33
lines changed

2 files changed

+102
-33
lines changed

lib/mustache_lexer.mll

Lines changed: 82 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,41 @@ and mustache = parse
110110
| eof { EOF }
111111

112112
{
113+
(* Trim whitespace around standalone tags.
114+
115+
The Mustache specification is careful with its treatment of
116+
whitespace. In particular, tags that do not themselves expand to
117+
visible content are defined as "standalone", with the
118+
requirement that if one or several standalone tags "stand alone"
119+
in a line (there is nothing else but whitespace), the whitespace
120+
of this line should be ommitted.
121+
122+
For example, this means that:
123+
{{#foo}}
124+
I can access {{var}} inside the section.
125+
{{/foo}
126+
takes, once rendered, only 1 line instead of 3: the newlines
127+
after {{#foo}} and {{/foo}} are part of the "standalone
128+
whitespace", so they are not included in the output.
129+
130+
Note: if a line contains only whitespace, no standalone tag,
131+
then the whitespace is preserved.
132+
133+
We implement this by a post-processing past on the lexer token
134+
stream. We split the token stream, one sub-stream per line, and
135+
then for each token line we determine if satisfies the
136+
standalone criterion.
137+
138+
Another information collected at the same time, as it is also
139+
part of whitespace processing, is the "indentation" of partials:
140+
if a partial expands to multi-line content, and if it is
141+
intended at the use-site (it is at a non-zero column with only
142+
whitespace before it on the line), then the specification
143+
mandates that all its lines should be indented by the same
144+
amount. We collect this information during the whitespace
145+
postprocessing of tokens, and store it in the Partial
146+
constructor as the first parameter.
147+
*)
113148
let handle_standalone lexer lexbuf =
114149
let ends_with_newline s =
115150
String.length s > 0 &&
@@ -148,40 +183,54 @@ and mustache = parse
148183
in
149184
loop 0 l
150185
in
151-
let is_standalone toks =
152-
let (skipped, toks) = skip_blanks toks in
153-
match toks with
154-
| ((OPEN_SECTION _
155-
| OPEN_INVERTED_SECTION _
156-
| CLOSE_SECTION _
157-
| PARTIAL _
158-
| COMMENT _), _, _) as tok :: toks' ->
159-
let (_, toks_rest) = skip_blanks toks' in
160-
begin match toks_rest with
161-
| [] | [(EOF, _, _)] ->
162-
let tok =
163-
match tok with
164-
| (PARTIAL (_, p), loc1, loc2) ->
165-
(PARTIAL (skipped, p), loc1, loc2)
166-
| _ -> tok
167-
in
168-
Some (tok, toks_rest)
169-
| _ -> None
170-
end
171-
| _ -> None
186+
let trim_standalone toks =
187+
let toks =
188+
(* if the line starts with a partial,
189+
turn the skipped blank into partial indentation *)
190+
let (skipped, toks_after_blank) = skip_blanks toks in
191+
match toks_after_blank with
192+
| (PARTIAL (_ , name), loc1, loc2) :: rest ->
193+
(PARTIAL (skipped, name), loc1, loc2) :: rest
194+
| _ -> toks
195+
in
196+
let toks =
197+
(* if the line only contains whitespace and at least one standalone tags,
198+
remove all whitespace *)
199+
let rec standalone acc = function
200+
| (RAW s, _, _) :: rest when is_blank s ->
201+
(* omit whitespace *)
202+
standalone acc rest
203+
| ((OPEN_SECTION _
204+
| OPEN_INVERTED_SECTION _
205+
| CLOSE_SECTION _
206+
| PARTIAL _
207+
| COMMENT _), _, _) as tok :: rest ->
208+
(* collect standalone tags *)
209+
standalone (tok :: acc) rest
210+
| [] | (EOF, _, _) :: _ ->
211+
(* end of line *)
212+
if (acc = []) then
213+
(* if acc is empty, the line only contains whitespace,
214+
which should be kept *)
215+
None
216+
else
217+
Some (List.rev acc)
218+
| _non_blank :: _rest ->
219+
(* non-blank, non-standalone token *)
220+
None
221+
in
222+
match standalone [] toks with
223+
| None -> toks
224+
| Some standalone_toks -> standalone_toks
225+
in
226+
assert (toks <> []);
227+
toks
172228
in
173-
174229
let buffer = ref [] in
175230
fun () ->
176-
match !buffer with
177-
| tok :: toks ->
178-
buffer := toks; tok
179-
| [] ->
180-
let toks = slurp_line () in
181-
match is_standalone toks with
182-
| Some (tok_standalone, toks_rest) ->
183-
buffer := toks_rest;
184-
tok_standalone
185-
| None ->
186-
buffer := List.tl toks; List.hd toks
231+
let toks = match !buffer with
232+
| (_ :: _) as toks -> toks
233+
| [] -> trim_standalone (slurp_line ())
234+
in
235+
buffer := List.tl toks; List.hd toks
187236
}

lib_test/test_mustache.ml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,26 @@ let tests = [
103103
, [ ( `O [ "a" , `String "foo" ],
104104
"foo" ) ] ) ;
105105

106+
( (* check that a whitespace line is omitted
107+
if it contains (several) standalone tokens *)
108+
"Begin
109+
{{#foo}} {{#bar}}
110+
Middle
111+
{{/bar}} {{/foo}}
112+
End
113+
"
114+
, concat [
115+
raw "Begin\n";
116+
section ["foo"] (section ["bar"] (raw "Middle\n"));
117+
raw "End\n";
118+
]
119+
, [ ( `O [ "foo" , `O []; "bar", `O [] ],
120+
"Begin
121+
Middle
122+
End
123+
"
124+
) ] ) ;
125+
106126
]
107127

108128
let mkloc (lnum_s, bol_s, cnum_s, lnum_e, bol_e, cnum_e) =

0 commit comments

Comments
 (0)