Skip to content

Commit 91b1147

Browse files
committed
fixed issue with guards, where expected branch would be skipped
- also optimized the generated code for `match%mik` - fix escaping for strings
1 parent c2c252e commit 91b1147

File tree

2 files changed

+111
-73
lines changed

2 files changed

+111
-73
lines changed

common/mik_lexer.mll

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,13 @@ let escape_special = function
5151
| '$' -> {|\$|}
5252
| '|' -> {|\||}
5353
| c -> String.make 1 c
54+
55+
let needs_escape = function
56+
| '(' | ')' | '[' | ']' | '{' | '}' | '.' | '*' | '+' | '?' | '^' | '$' | '|' -> true
57+
| _ -> false
5458
}
5559

60+
5661
let whitespace = [' ' '\t' '\r']
5762
let lowercase = ['a'-'z']
5863
let uppercase = ['A'-'Z']
@@ -108,24 +113,12 @@ and char_literal buf = parse
108113
char_literal buf lexbuf
109114
}
110115
| '\'' { CHAR_LITERAL (Buffer.contents buf) }
111-
| ('(' as c) | (')' as c) {
112-
Buffer.add_string buf (escape_special c);
113-
char_literal buf lexbuf
114-
}
115-
| ('{' as c) | ('}' as c) {
116-
Buffer.add_string buf (escape_special c);
117-
char_literal buf lexbuf
118-
}
119-
| ('[' as c) | (']' as c) {
120-
Buffer.add_string buf (escape_special c);
121-
char_literal buf lexbuf
122-
}
123-
| ('.' as c) | ('*' as c) | ('+' as c) | ('?' as c) | ('^' as c) | ('$' as c) | ('|' as c) {
124-
Buffer.add_string buf (escape_special c);
125-
char_literal buf lexbuf
126-
}
127116
| _ as c {
128-
Buffer.add_char buf c;
117+
begin if needs_escape c then
118+
Buffer.add_string buf (escape_special c)
119+
else
120+
Buffer.add_char buf c
121+
end;
129122
char_literal buf lexbuf
130123
}
131124
| eof { raise (Error "Unterminated character literal") }
@@ -137,7 +130,11 @@ and string_literal buf = parse
137130
}
138131
| '"' { STRING_LITERAL (Buffer.contents buf) }
139132
| _ as c {
140-
Buffer.add_char buf c;
133+
begin if needs_escape c then
134+
Buffer.add_string buf (escape_special c)
135+
else
136+
Buffer.add_char buf c
137+
end;
141138
string_literal buf lexbuf
142139
}
143140
| eof { raise (Error "Unterminated string literal") }

ppx_regexp/transformations.ml

Lines changed: 96 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ module Regexp = struct
1616
| Repeat ({ Location.txt = i, _; _ }, e) -> recurse (must_match && i > 0) e
1717
| Nongreedy e -> recurse must_match e
1818
| Capture _ -> Util.error ~loc "Unnamed capture is not allowed for %%pcre and %%mik."
19-
| Capture_as (idr, conv, e) ->
20-
fun (nG, bs) -> recurse must_match e (nG + 1, (idr, Some nG, conv, must_match) :: bs)
19+
| Capture_as (idr, conv, e) -> fun (nG, bs) -> recurse must_match e (nG + 1, (idr, Some nG, conv, must_match) :: bs)
2120
| Named_subs (idr, None, conv, e) | Named_subs (_, Some idr, conv, e) ->
2221
fun (nG, bs) -> recurse must_match e (nG + 1, (idr, Some nG, conv, must_match) :: bs)
2322
| Unnamed_subs (_, e) -> recurse must_match e
@@ -27,9 +26,7 @@ module Regexp = struct
2726
nG', ((res, None, Some (Pipe_all_func func), must_match) :: inner_bs) @ bs
2827
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mik."
2928
in
30-
function
31-
| { Location.txt = Capture_as (idr, _, e); _ } -> recurse true e (0, [ idr, None, None, true ])
32-
| e -> recurse true e (0, [])
29+
function { Location.txt = Capture_as (idr, _, e); _ } -> recurse true e (0, [ idr, None, None, true ]) | e -> recurse true e (0, [])
3330

3431
let to_string ~ctx =
3532
let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in
@@ -41,9 +38,7 @@ module Regexp = struct
4138
let content =
4239
match Util.Ctx.find var_name ctx with
4340
| Some value -> value
44-
| None ->
45-
Util.error ~loc
46-
"Variable '%s' not found. %%pcre and %%mik only support global let bindings for substitution." var_name
41+
| None -> Util.error ~loc "Variable '%s' not found. %%pcre and %%mik only support global let bindings for substitution." var_name
4742
in
4843
content
4944
in
@@ -86,11 +81,7 @@ let rec wrap_group_bindings ~loc ~captured_acc rhs offG = function
8681
let [%p ppat_var ~loc varG] = [%e func_app] in
8782
[%e rhs]]
8883
| (varG, iG, conv, mustG) :: bs ->
89-
let eG =
90-
match iG with
91-
| None -> [%expr Re.Group.get _g 0]
92-
| Some iG -> [%expr Re.Group.get _g [%e eint ~loc (offG + iG + 1)]]
93-
in
84+
let eG = match iG with None -> [%expr Re.Group.get _g 0] | Some iG -> [%expr Re.Group.get _g [%e eint ~loc (offG + iG + 1)]] in
9485
let eG =
9586
match conv with
9687
| None -> eG
@@ -125,8 +116,7 @@ let group_by_guard cases =
125116
let guard = case.pc_guard in
126117
let rec add_to_groups = function
127118
| [] -> [ guard, [ case ] ]
128-
| (g, cases) :: rest ->
129-
if guards_equal g guard then (g, case :: cases) :: rest else (g, cases) :: add_to_groups rest
119+
| (g, cases) :: rest -> if guards_equal g guard then (g, case :: cases) :: rest else (g, cases) :: add_to_groups rest
130120
in
131121
add_to_groups groups
132122
end
@@ -205,27 +195,53 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
205195
fun { txt = re_src; loc = { loc_start; loc_end; _ } } ->
206196
let re_offset = (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 in
207197
let pos = { loc_start with pos_cnum = loc_start.pos_cnum + re_offset; pos_lnum = loc_end.pos_lnum } in
208-
let parser =
209-
match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match
210-
in
198+
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match in
211199
let re, bs, nG = extract_bindings ~parser ~pos ~ctx re_src in
212200
re, nG, bs, case.pc_rhs, case.pc_guard
213201
end
214202
in
203+
let group_by_pattern cases =
204+
List.fold_left
205+
begin
206+
fun acc (re, nG, bs, rhs, guard) ->
207+
let found, groups =
208+
List.fold_left
209+
begin
210+
fun (found, acc_groups) (re', cases) ->
211+
if found then found, (re', cases) :: acc_groups
212+
else if re = re' then true, (re', (nG, bs, rhs, guard) :: cases) :: acc_groups
213+
else false, (re', cases) :: acc_groups
214+
end
215+
(false, []) acc
216+
in
217+
if found then groups else (re, [ nG, bs, rhs, guard ]) :: groups
218+
end
219+
[] cases
220+
in
221+
222+
let compute_offsets l =
223+
let result, _ =
224+
List.fold_left
225+
begin
226+
fun (acc, offG) (re, case_group) ->
227+
let nG =
228+
let n, _, _, _ = List.hd (List.rev case_group) in
229+
n
230+
in
231+
(re, case_group, offG) :: acc, offG + nG
232+
end
233+
([], 0) l
234+
in
235+
List.rev result
236+
in
237+
215238
let cases, default_cases = separate_defaults [] cases in
216239
let default_rhs = make_default_rhs ~loc default_cases in
240+
let processed_cases = List.map aux cases |> group_by_pattern |> compute_offsets in
217241

218-
let processed_cases = List.rev_map aux cases in
219-
let res = pexp_array ~loc @@ List.map (fun (re, _, _, _, _) -> re) processed_cases in
242+
let res = pexp_array ~loc @@ List.map (fun (re, _, _) -> re) processed_cases in
220243

221-
let opts_expr =
222-
let rec opts_to_expr = function
223-
| [] -> [%expr []]
224-
| `Caseless :: rest -> [%expr `Caseless :: [%e opts_to_expr rest]]
225-
| _ -> assert false
226-
in
227-
opts_to_expr opts
228-
in
244+
let opts_expr = match opts with [] -> [%expr []] | [ `Caseless ] -> [%expr [ `Caseless ]] | _ -> failwith "Unknown option" in
229245

230246
let comp =
231247
[%expr
@@ -239,43 +255,71 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
239255
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
240256
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
241257

242-
let rec handle_cases i offG = function
243-
| [] -> default_rhs
244-
| (_, nG, bs, rhs, guard) :: cases ->
245-
let bs = List.rev bs in
246-
let handled_cases = handle_cases (i + 1) (offG + nG) cases in
247-
[%expr
248-
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then
249-
[%e
250-
let wrapped_with_guard =
251-
match guard with
252-
| None -> rhs
253-
| Some guard_expr -> [%expr if [%e guard_expr] then [%e rhs] else [%e handled_cases]]
258+
let case_handlers =
259+
List.mapi
260+
begin
261+
fun i (_, case_group, offG) ->
262+
let handler_name = Printf.sprintf "_case_%d" i in
263+
let handler_body =
264+
let rec mk_guard_chains = function
265+
| [] -> [%expr None]
266+
| (_, bs, rhs, guard) :: rest ->
267+
let bs = List.rev bs in
268+
begin
269+
match guard with
270+
| None -> [%expr Some [%e wrap_group_bindings ~captured_acc:[] ~loc rhs offG bs]]
271+
| Some guard_expr ->
272+
let guarded = [%expr if [%e guard_expr] then Some [%e rhs] else [%e mk_guard_chains rest]] in
273+
wrap_group_bindings ~captured_acc:[] ~loc guarded offG bs
274+
end
254275
in
255-
wrap_group_bindings ~captured_acc:[] ~loc wrapped_with_guard offG bs]
256-
else [%e handled_cases]]
276+
[%expr fun _g -> [%e mk_guard_chains (List.rev case_group)]]
277+
in
278+
handler_name, handler_body
279+
end
257280
in
258281

282+
let mk_checks cases_with_offsets =
283+
let indexed = List.mapi (fun i x -> i, x) cases_with_offsets in
284+
List.fold_right
285+
begin
286+
fun (i, _) acc ->
287+
let handler_name = Printf.sprintf "_case_%d" i in
288+
[%expr
289+
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then [%e pexp_ident ~loc { txt = Lident handler_name; loc }] _g
290+
else [%e acc]]
291+
end
292+
indexed [%expr None]
293+
in
294+
295+
let handlers = case_handlers processed_cases in
296+
let dispatchers = mk_checks processed_cases in
297+
259298
let match_expr =
260299
[%expr
261300
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
262301
| None -> [%e default_rhs]
263-
| Some _g -> [%e handle_cases 0 0 processed_cases]]
302+
| Some _g ->
303+
[%e
304+
List.fold_left
305+
begin
306+
fun acc (name, body) ->
307+
[%expr
308+
let [%p ppat_var ~loc { txt = name; loc }] = [%e body] in
309+
[%e acc]]
310+
end
311+
[%expr match [%e dispatchers] with Some result -> result | None -> [%e default_rhs]]
312+
handlers]]
264313
in
265-
266-
( [%expr
267-
let _ppx_regexp_v = [%e pexp_ident ~loc { txt = Lident "_ppx_regexp_v"; loc }] in
268-
[%e match_expr]],
269-
[ re_binding ] )
314+
[%expr [%e match_expr]], [ re_binding ]
270315

271316
(* processes each case individually instead of combining them into one RE *)
272317
let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
273318
let aux case =
274319
match case.pc_lhs.ppat_desc with
275320
| Ppat_extension
276-
( { txt = "mik"; _ },
277-
PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ]
278-
) ->
321+
({ txt = "mik"; _ }, PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ])
322+
->
279323
let pos = str_loc.loc_start in
280324
let parser = Regexp.parse_mik_exn ~target:`Match in
281325
let re, bs, nG = extract_bindings ~parser ~pos ~ctx pat in
@@ -326,13 +370,10 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
326370
| Some _g ->
327371
[%e
328372
let bs = List.rev bs in
329-
(* let body = wrap_group_bindings ~captured_acc:[] ~loc rhs 0 bs in *)
330373
match guard with
331374
| None -> wrap_group_bindings ~captured_acc:[] ~loc rhs 0 bs
332375
| Some g ->
333-
let guarded_rhs =
334-
[%expr if [%e g] then [%e rhs] else [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
335-
in
376+
let guarded_rhs = [%expr if [%e g] then [%e rhs] else [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]] in
336377
wrap_group_bindings ~captured_acc:[] ~loc guarded_rhs 0 bs]
337378
| None -> [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
338379
| `Mik _ :: rest, _ ->

0 commit comments

Comments
 (0)