Skip to content

Commit 3c13d68

Browse files
committed
fix: issue with memory allocation for mixed match
changed code generation for regular match to make it fix ready for when a solution is found for the ocaml-re marking machinery stopping at the first match, and not searching further. Updated the docs to warn of this limitation
1 parent 490285d commit 3c13d68

File tree

3 files changed

+54
-62
lines changed

3 files changed

+54
-62
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,11 @@ patterns succeed, the match is committed before we can check the guard
202202
condition.
203203

204204
`ppx_regexp_extended` gets around this by grouping match cases with the same guards and compiling those together, instead
205-
of every match case being compiled into one RE.
205+
of every match case being compiled into one RE.
206+
> [!WARNING]
207+
> There is still a limitation with the guards: if two branches have overlapping REs, and the first has a guard that evaluates to false,
208+
> then the second branch will not be ran. This is because of a limitation with `ocaml-re`'s Marking machine, it only
209+
> tests until a mark is found, and doesn't search further.
206210
207211

208212
### No Exhaustiveness Check

ppx_regexp/ppx_regexp.ml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,21 @@ let transformation ctx =
101101
| _ -> e_ext, acc
102102
end
103103

104+
let dispatch_function_binding ~loc =
105+
let open Ppxlib in
106+
let open Ast_builder.Default in
107+
value_binding ~loc
108+
~pat:(ppat_var ~loc { txt = "__ppx_regexp_dispatch"; loc })
109+
~expr:
110+
[%expr
111+
fun marks handlers _g ->
112+
let rec loop i =
113+
if i >= Array.length marks then None
114+
else if Re.Mark.test _g marks.(i) then (match handlers.(i) _g with Some result -> Some result | None -> loop (i + 1))
115+
else loop (i + 1)
116+
in
117+
loop 0]
118+
104119
let impl str =
105120
let ctx = Util.Ctx.empty () in
106121
let str, rev_bindings = (transformation ctx)#structure str [] in
@@ -110,6 +125,7 @@ let impl str =
110125
let loc = Location.none in
111126
[%str
112127
open struct
128+
[%%i pstr_value ~loc Nonrecursive [ dispatch_function_binding ~loc ]]
113129
[%%i pstr_value ~loc Nonrecursive rev_bindings]
114130
end]
115131
in

ppx_regexp/transformations.ml

Lines changed: 33 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ module Regexp = struct
1515
| Opt e -> recurse false e
1616
| Repeat ({ Location.txt = i, _; _ }, e) -> recurse (must_match && i > 0) e
1717
| Nongreedy e -> recurse must_match e
18-
| Capture _ -> Util.error ~loc "Unnamed capture is not allowed for %%pcre and %%mik."
18+
| Capture _ -> Util.error ~loc "Unnamed capture is not allowed for %%pcre and %%mikmatch."
1919
| Capture_as (idr, conv, e) -> fun (nG, bs) -> recurse must_match e (nG + 1, (idr, Some nG, conv, must_match) :: bs)
2020
| Named_subs (idr, None, conv, e) | Named_subs (_, Some idr, conv, e) ->
2121
fun (nG, bs) -> recurse must_match e (nG + 1, (idr, Some nG, conv, must_match) :: bs)
@@ -24,7 +24,7 @@ module Regexp = struct
2424
fun (nG, bs) ->
2525
let nG', inner_bs = recurse must_match e (nG, []) in
2626
nG', ((res, None, Some (Pipe_all_func func), must_match) :: inner_bs) @ bs
27-
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mik."
27+
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mikmatch."
2828
in
2929
function { Location.txt = Capture_as (idr, conv, e); _ } -> recurse true e (0, [ idr, None, conv, true ]) | e -> recurse true e (0, [])
3030

@@ -36,7 +36,8 @@ module Regexp = struct
3636
let content =
3737
match Util.Ctx.find var_name ctx with
3838
| Some value -> value
39-
| None -> Util.error ~loc "Variable '%s' not found. %%pcre and %%mik only support global let bindings for substitution." var_name
39+
| None ->
40+
Util.error ~loc "Variable '%s' not found. %%pcre and %%mikmatch only support global let bindings for substitution." var_name
4041
in
4142
content
4243
in
@@ -57,7 +58,7 @@ module Regexp = struct
5758
let j_str = match j_opt with None -> "" | Some j -> string_of_int j in
5859
delimit_if (p > p_suffix) (Printf.sprintf "%s{%d,%s}" (recurse p_atom e) i j_str)
5960
| Nongreedy e -> recurse p_suffix e ^ "?"
60-
| Capture _ -> Util.error ~loc "Unnamed capture is not allowed for %%pcre and %%mik."
61+
| Capture _ -> Util.error ~loc "Unnamed capture is not allowed for %%pcre and %%mikmatch."
6162
| Capture_as (_, _, e) -> "(" ^ recurse p_alt e ^ ")"
6263
| Named_subs (idr, _, _, _) ->
6364
let content = get_parsed ~loc idr in
@@ -66,7 +67,7 @@ module Regexp = struct
6667
let content = get_parsed ~loc idr in
6768
recurse p_atom content
6869
| Pipe_all (_, _, e) -> recurse p_alt e
69-
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mik."
70+
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mikmatch."
7071
in
7172
function { Location.txt = Capture_as (_, _, e); _ } -> recurse 0 e | e -> recurse 0 e
7273
end
@@ -167,11 +168,9 @@ let make_default_rhs ~loc = function
167168
| _ -> case)
168169
default_cases
169170
in
170-
match transformed with
171-
| [{ pc_lhs = { ppat_desc = Ppat_any; _ }; pc_guard = None; pc_rhs; _ }] ->
172-
pc_rhs
173-
| _ ->
174-
pexp_match ~loc [%expr _ppx_regexp_v] transformed
171+
(match transformed with
172+
| [ { pc_lhs = { ppat_desc = Ppat_any; _ }; pc_guard = None; pc_rhs; _ } ] -> pc_rhs
173+
| _ -> pexp_match ~loc [%expr _ppx_regexp_v] transformed)
175174

176175
let transform_let ~mode ~ctx =
177176
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Let | `Mik -> Regexp.parse_mik_exn ~target:`Let in
@@ -264,7 +263,7 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
264263
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
265264
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
266265

267-
let case_handlers =
266+
let case_bindings =
268267
List.mapi
269268
begin
270269
fun i (_, case_group, offG) ->
@@ -284,41 +283,25 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
284283
in
285284
[%expr fun _g -> [%e mk_guard_chains (List.rev case_group)]]
286285
in
287-
handler_name, handler_body
286+
value_binding ~loc ~pat:(ppat_var ~loc { txt = handler_name; loc }) ~expr:handler_body
288287
end
288+
processed_cases
289289
in
290290

291-
let mk_checks cases_with_offsets =
292-
let indexed = List.mapi (fun i x -> i, x) cases_with_offsets in
293-
List.fold_right
294-
begin
295-
fun (i, _) acc ->
296-
let handler_name = Printf.sprintf "_case_%d" i in
297-
[%expr
298-
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then [%e pexp_ident ~loc { txt = Lident handler_name; loc }] _g
299-
else [%e acc]]
300-
end
301-
indexed [%expr None]
291+
let handlers_array =
292+
pexp_array ~loc @@ List.mapi (fun i _ -> pexp_ident ~loc { txt = Lident (Printf.sprintf "_case_%d" i); loc }) processed_cases
302293
in
303294

304-
let handlers = case_handlers processed_cases in
305-
let dispatchers = mk_checks processed_cases in
306-
307295
let match_expr =
308296
[%expr
309297
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
310298
| None -> [%e default_rhs]
311299
| Some _g ->
312300
[%e
313-
List.fold_left
314-
begin
315-
fun acc (name, body) ->
316-
[%expr
317-
let [%p ppat_var ~loc { txt = name; loc }] = [%e body] in
318-
[%e acc]]
319-
end
320-
[%expr match [%e dispatchers] with Some result -> result | None -> [%e default_rhs]]
321-
handlers]]
301+
pexp_let ~loc Nonrecursive case_bindings
302+
[%expr
303+
let handlers = [%e handlers_array] in
304+
match __ppx_regexp_dispatch (snd [%e e_comp]) handlers _g with Some result -> result | None -> [%e default_rhs]]]]
322305
in
323306
match_expr, re_binding
324307

@@ -341,13 +324,13 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
341324

342325
let prepared_cases = List.map aux cases in
343326

344-
let has_mik = List.exists (function `Ext _ -> true | _ -> false) prepared_cases in
327+
let has_ext = List.exists (function `Ext _ -> true | _ -> false) prepared_cases in
345328

346-
if not has_mik then begin
329+
if not has_ext then begin
347330
match matched_expr with None -> pexp_function ~loc cases, acc | Some m -> pexp_match ~loc m cases, acc
348331
end
349332
else begin
350-
let mik_compilations =
333+
let compilations =
351334
List.mapi
352335
begin
353336
fun i case ->
@@ -364,18 +347,18 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
364347
|> List.filter_map (fun x -> x)
365348
in
366349

367-
let bindings = List.map (fun (_, _, b) -> b) mik_compilations in
350+
let bindings = List.map (fun (_, _, b) -> b) compilations in
368351

369-
let rec build_ordered_match input_var case_idx cases mik_comps =
370-
match cases, mik_comps with
352+
let rec build_ordered_match input_var case_idx cases comps =
353+
match cases, comps with
371354
| [], _ ->
372355
(* should not happen if original had catch-all *)
373356
[%expr raise (Match_failure ("", 0, 0))]
374357
| `Regular case :: rest, _ ->
375358
[%expr
376359
match [%e input_var] with
377360
| [%p case.pc_lhs] when [%e Option.value case.pc_guard ~default:[%expr true]] -> [%e case.pc_rhs]
378-
| _ -> [%e build_ordered_match input_var (case_idx + 1) rest mik_comps]]
361+
| _ -> [%e build_ordered_match input_var (case_idx + 1) rest comps]]
379362
| `Ext (_, _, _, bs, rhs, guard) :: rest, (idx, comp_var, _) :: rest_comps when idx = case_idx ->
380363
let comp_ident = pexp_ident ~loc { txt = Lident comp_var; loc } in
381364
[%expr
@@ -391,29 +374,18 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
391374
| None -> [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
392375
| `Ext _ :: rest, _ ->
393376
(* shouldn't happen if indices are correct *)
394-
build_ordered_match input_var (case_idx + 1) rest mik_comps
377+
build_ordered_match input_var (case_idx + 1) rest comps
395378
in
396379

397-
let match_body = build_ordered_match [%expr _ppx_regexp_v] 0 prepared_cases mik_compilations in
380+
let match_body = build_ordered_match [%expr _ppx_regexp_v] 0 prepared_cases compilations in
398381

399382
let match_expr =
400-
let init =
401-
match matched_expr with
402-
| None -> [%expr fun _ppx_regexp_v -> [%e match_body]]
403-
| Some m ->
404-
[%expr
405-
let _ppx_regexp_v = [%e m] in
406-
[%e match_body]]
407-
in
408-
List.fold_left
409-
begin
410-
fun expr binding ->
411-
[%expr
412-
let [%p binding.pvb_pat] = [%e binding.pvb_expr] in
413-
[%e expr]]
414-
end
415-
init bindings
383+
match matched_expr with
384+
| None -> [%expr fun _ppx_regexp_v -> [%e match_body]]
385+
| Some m ->
386+
[%expr
387+
let _ppx_regexp_v = [%e m] in
388+
[%e match_body]]
416389
in
417-
418390
match_expr, bindings @ acc
419391
end

0 commit comments

Comments
 (0)