Skip to content

Commit c2c252e

Browse files
committed
access to RE bound variables inside respective match guards
- cases no longer grouped by guard in `match%mik/pcre`, every one is compiled into a single RE - if a mark is matched, bind variables and test guard - limitation of the current implementation is that you can't have two of the same matches with different guards. because they are compiled into one RE, and they aren't merged, Re.alt will return the first match regardless, and the group will be wrong for Re.Match.get - fix `structure_item`
1 parent edc86a5 commit c2c252e

File tree

2 files changed

+116
-125
lines changed

2 files changed

+116
-125
lines changed

ppx_regexp/ppx_regexp.ml

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ open Ppxlib
1818
open Ast_builder.Default
1919

2020
let transformation ctx =
21-
object
21+
object (self)
2222
inherit [value_binding list] Ast_traverse.fold_map as super
2323

24+
(* Replace the entire method! structure_item in ast_builder.ml with this: *)
2425
method! structure_item item acc =
2526
match item.pstr_desc with
2627
(* let%mik/%pcre x = {|some regex|}*)
@@ -30,73 +31,67 @@ let transformation ctx =
3031
let bindings = Transformations.transform_let ~mode ~ctx vbs in
3132
let new_item = { item with pstr_desc = Pstr_value (rec_flag, bindings) } in
3233
new_item, acc
33-
(* let x = {%mik|some regex|} or {%pcre|some regex|}*)
34+
(* let x = expression (which might contain %mik/%pcre) *)
3435
| Pstr_value (rec_flag, vbs) ->
35-
let has_ppx_extensions =
36-
List.exists
37-
begin
38-
fun vb ->
39-
match vb.pvb_expr.pexp_desc with Pexp_extension ({ txt = "pcre" | "mik"; _ }, _) -> true | _ -> false
40-
end
41-
vbs
36+
let processed_vbs, collected_bindings =
37+
List.fold_left
38+
(fun (vbs_acc, bindings_acc) vb ->
39+
match vb.pvb_expr.pexp_desc with
40+
| Pexp_extension ({ txt = ("pcre" | "mik") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (expr, _); _ } ])
41+
when match expr.pexp_desc with Pexp_constant (Pconst_string _) -> true | _ -> false ->
42+
let mode = if ext = "pcre" then `Pcre else `Mik in
43+
let new_vb = { vb with pvb_expr = expr } in
44+
let transformed = Transformations.transform_let ~mode ~ctx [ new_vb ] in
45+
List.hd transformed :: vbs_acc, bindings_acc
46+
| _ ->
47+
let new_expr, new_bindings = self#expression vb.pvb_expr bindings_acc in
48+
let new_vb = { vb with pvb_expr = new_expr } in
49+
new_vb :: vbs_acc, new_bindings)
50+
([], acc) vbs
4251
in
43-
44-
if has_ppx_extensions then begin
45-
let bindings =
46-
List.map
47-
begin
48-
fun vb ->
49-
match vb.pvb_expr.pexp_desc with
50-
| Pexp_extension
51-
({ txt = ("pcre" | "mik") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (expr, _); _ } ]) ->
52-
let mode = if ext = "pcre" then `Pcre else `Mik in
53-
let new_vb = { vb with pvb_expr = expr } in
54-
let transformed = Transformations.transform_let ~mode ~ctx [ new_vb ] in
55-
List.hd transformed
56-
| _ -> vb
57-
end
58-
vbs
59-
in
60-
let new_item = { item with pstr_desc = Pstr_value (rec_flag, bindings) } in
61-
new_item, acc
62-
end
63-
else super#structure_item item acc
52+
let new_item = { item with pstr_desc = Pstr_value (rec_flag, List.rev processed_vbs) } in
53+
new_item, collected_bindings
6454
| _ -> super#structure_item item acc
6555

6656
method! expression e_ext acc =
6757
let e_ext, acc = super#expression e_ext acc in
6858
let make_transformations ~mode ~opts ~loc = function
59+
| Pexp_function cases ->
60+
let cases, bindings = Transformations.transform_cases ~mode ~opts ~loc ~ctx cases in
61+
[%expr fun _ppx_regexp_v -> [%e cases]], bindings @ acc
6962
| Pexp_match (e, cases) ->
7063
let cases, bindings = Transformations.transform_cases ~mode ~opts ~loc ~ctx cases in
7164
( [%expr
7265
let _ppx_regexp_v = [%e e] in
7366
[%e cases]],
7467
bindings @ acc )
75-
| Pexp_function cases ->
76-
let cases, bindings = Transformations.transform_cases ~mode ~opts ~loc ~ctx cases in
77-
[%expr fun _ppx_regexp_v -> [%e cases]], bindings @ acc
7868
| _ ->
7969
Util.error ~loc "[%%pcre] and [%%mik] only apply to match, function and global let declarations of strings."
8070
in
8171
match e_ext.pexp_desc with
72+
(* match%mik/match%pcre and function%mik/function%pcre*)
73+
| Pexp_extension
74+
({ txt = ("pcre" | "mik" | "pcre_i" | "mik_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
75+
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
76+
let opts = if String.ends_with ~suffix:"_i" ext then [ `Caseless ] else [] in
77+
let loc = e.pexp_loc in
78+
make_transformations ~mode ~opts ~loc e.pexp_desc
8279
(* match smth with | {%mik|some regex|} -> ...*)
8380
| Pexp_match (matched_expr, cases) ->
8481
let has_mik_case =
8582
List.exists
8683
(fun case -> match case.pc_lhs.ppat_desc with Ppat_extension ({ txt = "mik"; _ }, _) -> true | _ -> false)
8784
cases
8885
in
89-
if has_mik_case then Transformations.transform_mixed_match ~loc:e_ext.pexp_loc ~ctx matched_expr cases acc
86+
if has_mik_case then Transformations.transform_mixed_match ~loc:e_ext.pexp_loc ~ctx ~matched_expr cases acc
9087
else e_ext, acc
91-
(* match%mik/match%pcre and function%mik/function%pcre*)
92-
| Pexp_extension ({ txt = ("pcre" | "mik") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
93-
let mode = if ext = "pcre" then `Pcre else `Mik in
94-
let loc = e.pexp_loc in
95-
make_transformations ~mode ~opts:[] ~loc e.pexp_desc
96-
| Pexp_extension ({ txt = ("pcre_i" | "mik_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
97-
let mode = if ext = "pcre" then `Pcre else `Mik in
98-
let loc = e.pexp_loc in
99-
make_transformations ~mode ~opts:[ `Caseless ] ~loc e.pexp_desc
88+
| Pexp_function cases ->
89+
let has_mik_case =
90+
List.exists
91+
(fun case -> match case.pc_lhs.ppat_desc with Ppat_extension ({ txt = "mik"; _ }, _) -> true | _ -> false)
92+
cases
93+
in
94+
if has_mik_case then Transformations.transform_mixed_match ~loc:e_ext.pexp_loc ~ctx cases acc else e_ext, acc
10095
| _ -> e_ext, acc
10196
end
10297

ppx_regexp/transformations.ml

Lines changed: 78 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ let extract_bindings ~(parser : ?pos:position -> string -> string Regexp_types.t
143143
let r = parser ~pos s in
144144
let nG, bs = Regexp.bindings r in
145145
let re_str = Regexp.to_string ~ctx r in
146-
Format.printf "RE: %s@." re_str;
147146
let loc = Location.none in
148147
estring ~loc re_str, bs, nG
149148

@@ -213,81 +212,65 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
213212
re, nG, bs, case.pc_rhs, case.pc_guard
214213
end
215214
in
216-
217215
let cases, default_cases = separate_defaults [] cases in
218-
219216
let default_rhs = make_default_rhs ~loc default_cases in
220-
let grouped_cases = group_by_guard cases in
221-
222-
let compiled_groups =
223-
List.map
224-
begin
225-
fun (guard, group_cases) ->
226-
let processed_cases = List.rev_map aux group_cases in
227-
let res = pexp_array ~loc @@ List.map (fun (re, _, _, _, _) -> re) processed_cases in
228-
229-
let opts_expr =
230-
let rec opts_to_expr = function
231-
| [] -> [%expr []]
232-
| `Caseless :: rest -> [%expr `Caseless :: [%e opts_to_expr rest]]
233-
| _ -> assert false
234-
in
235-
opts_to_expr opts
236-
in
237-
let comp =
238-
[%expr
239-
let a = Array.map (fun s -> Re.mark (Re.Perl.re ~opts:[%e opts_expr] s)) [%e res] in
240-
let marks = Array.map fst a in
241-
let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
242-
re, marks]
243-
in
244-
let var = Util.fresh_var () in
245-
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
246-
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
247217

248-
let rec handle_cases i offG = function
249-
| [] -> [%expr assert false]
250-
| (_, nG, bs, rhs, _) :: cases ->
251-
let bs = List.rev bs in
252-
[%expr
253-
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then
254-
[%e wrap_group_bindings ~captured_acc:[] ~loc rhs offG bs]
255-
else [%e handle_cases (i + 1) (offG + nG) cases]]
256-
in
218+
let processed_cases = List.rev_map aux cases in
219+
let res = pexp_array ~loc @@ List.map (fun (re, _, _, _, _) -> re) processed_cases in
257220

258-
let match_expr =
259-
[%expr
260-
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
261-
| None -> None
262-
| Some _g -> Some [%e handle_cases 0 0 processed_cases]]
263-
in
221+
let opts_expr =
222+
let rec opts_to_expr = function
223+
| [] -> [%expr []]
224+
| `Caseless :: rest -> [%expr `Caseless :: [%e opts_to_expr rest]]
225+
| _ -> assert false
226+
in
227+
opts_to_expr opts
228+
in
264229

265-
guard, match_expr, re_binding
266-
end
267-
grouped_cases
230+
let comp =
231+
[%expr
232+
let a = Array.map (fun s -> Re.mark (Re.Perl.re ~opts:[%e opts_expr] s)) [%e res] in
233+
let marks = Array.map fst a in
234+
let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
235+
re, marks]
268236
in
269237

270-
let rec try_groups = function
238+
let var = Util.fresh_var () in
239+
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
240+
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
241+
242+
let rec handle_cases i offG = function
271243
| [] -> default_rhs
272-
| (None, match_expr, _) :: rest ->
273-
[%expr match [%e match_expr] with Some result -> result | None -> [%e try_groups rest]]
274-
| (Some guard_expr, match_expr, _) :: rest ->
244+
| (_, nG, bs, rhs, guard) :: cases ->
245+
let bs = List.rev bs in
246+
let handled_cases = handle_cases (i + 1) (offG + nG) cases in
275247
[%expr
276-
if [%e guard_expr] then (match [%e match_expr] with Some result -> result | None -> [%e try_groups rest])
277-
else [%e try_groups rest]]
248+
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then
249+
[%e
250+
let wrapped_with_guard =
251+
match guard with
252+
| None -> rhs
253+
| Some guard_expr -> [%expr if [%e guard_expr] then [%e rhs] else [%e handled_cases]]
254+
in
255+
wrap_group_bindings ~captured_acc:[] ~loc wrapped_with_guard offG bs]
256+
else [%e handled_cases]]
278257
in
279258

280-
let final_expr = try_groups compiled_groups in
281-
let all_bindings = List.map (fun (_, _, b) -> b) compiled_groups in
259+
let match_expr =
260+
[%expr
261+
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
262+
| None -> [%e default_rhs]
263+
| Some _g -> [%e handle_cases 0 0 processed_cases]]
264+
in
282265

283266
( [%expr
284267
let _ppx_regexp_v = [%e pexp_ident ~loc { txt = Lident "_ppx_regexp_v"; loc }] in
285-
[%e final_expr]],
286-
all_bindings )
268+
[%e match_expr]],
269+
[ re_binding ] )
287270

288271
(* processes each case individually instead of combining them into one RE *)
289-
let transform_mixed_match ~loc ~ctx matched_expr cases acc =
290-
let prepare_case case =
272+
let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
273+
let aux case =
291274
match case.pc_lhs.ppat_desc with
292275
| Ppat_extension
293276
( { txt = "mik"; _ },
@@ -300,23 +283,26 @@ let transform_mixed_match ~loc ~ctx matched_expr cases acc =
300283
| _ -> `Regular case
301284
in
302285

303-
let prepared_cases = List.map prepare_case cases in
286+
let prepared_cases = List.map aux cases in
304287

305-
(* Check if there are any mik cases *)
306288
let has_mik = List.exists (function `Mik _ -> true | _ -> false) prepared_cases in
307289

308-
if not has_mik then pexp_match ~loc matched_expr cases, acc
290+
if not has_mik then begin
291+
match matched_expr with None -> pexp_function ~loc cases, acc | Some m -> pexp_match ~loc m cases, acc
292+
end
309293
else begin
310294
let mik_compilations =
311295
List.mapi
312-
(fun i case ->
313-
match case with
314-
| `Mik (re, _, _, _, _) ->
315-
let comp_var = Util.fresh_var () in
316-
let comp_expr = [%expr Re.compile (Re.Perl.re [%e re])] in
317-
let binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = comp_var; loc }) ~expr:comp_expr in
318-
Some (i, comp_var, binding)
319-
| _ -> None)
296+
begin
297+
fun i case ->
298+
match case with
299+
| `Mik (re, _, _, _, _) ->
300+
let comp_var = Util.fresh_var () in
301+
let comp_expr = [%expr Re.compile (Re.Perl.re [%e re])] in
302+
let binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = comp_var; loc }) ~expr:comp_expr in
303+
Some (i, comp_var, binding)
304+
| _ -> None
305+
end
320306
prepared_cases
321307
|> List.filter_map (fun x -> x)
322308
in
@@ -340,11 +326,14 @@ let transform_mixed_match ~loc ~ctx matched_expr cases acc =
340326
| Some _g ->
341327
[%e
342328
let bs = List.rev bs in
343-
let body = wrap_group_bindings ~captured_acc:[] ~loc rhs 0 bs in
329+
(* let body = wrap_group_bindings ~captured_acc:[] ~loc rhs 0 bs in *)
344330
match guard with
345-
| None -> body
331+
| None -> wrap_group_bindings ~captured_acc:[] ~loc rhs 0 bs
346332
| Some g ->
347-
[%expr if [%e g] then [%e body] else [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]]
333+
let guarded_rhs =
334+
[%expr if [%e g] then [%e rhs] else [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
335+
in
336+
wrap_group_bindings ~captured_acc:[] ~loc guarded_rhs 0 bs]
348337
| None -> [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
349338
| `Mik _ :: rest, _ ->
350339
(* shouldn't happen if indices are correct *)
@@ -354,15 +343,22 @@ let transform_mixed_match ~loc ~ctx matched_expr cases acc =
354343
let match_body = build_ordered_match [%expr _ppx_regexp_v] 0 prepared_cases mik_compilations in
355344

356345
let match_expr =
357-
List.fold_right
358-
(fun binding expr ->
346+
let init =
347+
match matched_expr with
348+
| None -> [%expr fun _ppx_regexp_v -> [%e match_body]]
349+
| Some m ->
359350
[%expr
360-
let [%p binding.pvb_pat] = [%e binding.pvb_expr] in
361-
[%e expr]])
362-
bindings
363-
[%expr
364-
let _ppx_regexp_v = [%e matched_expr] in
365-
[%e match_body]]
351+
let _ppx_regexp_v = [%e m] in
352+
[%e match_body]]
353+
in
354+
List.fold_left
355+
begin
356+
fun expr binding ->
357+
[%expr
358+
let [%p binding.pvb_pat] = [%e binding.pvb_expr] in
359+
[%e expr]]
360+
end
361+
init bindings
366362
in
367363

368364
match_expr, bindings @ acc

0 commit comments

Comments
 (0)