Skip to content

Commit d8759d7

Browse files
committed
qol: tests, let destructuring warning, mixed match def rhs
- let destructuring warnings for alternations with different capture names on each branch, this can't happen
1 parent 8158e04 commit d8759d7

File tree

4 files changed

+686
-253
lines changed

4 files changed

+686
-253
lines changed

ppx_regexp/transformations.ml

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,52 @@ module Regexp = struct
187187
{ txt = new_txt; loc = new_loc }
188188
in
189189
recurse e
190+
191+
let check_alternation_captures ~loc pattern =
192+
let rec get_alt_captures = function
193+
| { Location.txt = Regexp_types.Alt branches; _ } -> Some (List.map get_branch_captures branches)
194+
| { Location.txt = Seq es; _ } -> List.find_map get_alt_captures es
195+
| { Location.txt = Capture_as (_, _, e); _ } -> get_alt_captures e
196+
| _ -> None
197+
and get_branch_captures branch =
198+
let rec collect_captures acc = function
199+
| { Location.txt = Regexp_types.Capture_as (name, _, e); _ } -> collect_captures (name.txt :: acc) e
200+
| { Location.txt = Seq es; _ } -> List.fold_left collect_captures acc es
201+
| { Location.txt = Opt e; _ }
202+
| { Location.txt = Repeat (_, e); _ }
203+
| { Location.txt = Nongreedy e; _ }
204+
| { Location.txt = Caseless e; _ }
205+
| { Location.txt = Capture e; _ } ->
206+
collect_captures acc e
207+
| { Location.txt = Alt _; _ } -> acc
208+
| _ -> acc
209+
in
210+
collect_captures [] branch |> List.rev
211+
in
212+
213+
match get_alt_captures pattern with
214+
| None -> fun expr -> expr
215+
| Some branches_captures ->
216+
(* check if different branches have different capture variables *)
217+
let all_vars = List.concat branches_captures |> List.sort_uniq String.compare in
218+
let branch_var_sets = List.map (List.sort_uniq String.compare) branches_captures in
219+
220+
(* if not all branches have the same variables, issue warning *)
221+
let has_inconsistent_captures =
222+
List.exists (fun branch_vars -> List.length branch_vars <> List.length all_vars || branch_vars <> all_vars) branch_var_sets
223+
in
224+
225+
if has_inconsistent_captures && List.length all_vars > 1 then begin
226+
let warning_msg =
227+
Printf.sprintf
228+
{| This let destruct pattern has alternations with different capture groups (%s).
229+
Only one branch will match at runtime, but all variables are being bound.
230+
Consider using a single capture group over the alternations. |}
231+
(String.concat ", " all_vars)
232+
in
233+
Util.warn ~loc warning_msg
234+
end
235+
else fun expr -> expr
190236
end
191237

192238
module Parser = struct
@@ -197,7 +243,7 @@ module Parser = struct
197243
let r = Regexp.(relocate ~pos @@ squash_codes r) in
198244
let nG, bs = Regexp.bindings r in
199245
let re = Regexp.to_re_expr ~in_let:(target = `Let) r in
200-
re, bs, nG, flags
246+
r, re, bs, nG, flags
201247
end
202248

203249
let make_default_rhs ~mode ~target ~loc = function
@@ -222,6 +268,7 @@ let make_default_rhs ~mode ~target ~loc = function
222268
| `Match, `Mik -> "any mikmatch cases"
223269
| `Let, `Pcre -> "the pcre regex"
224270
| `Let, `Mik -> "the mikmatch regex"
271+
| _, `Mixed -> "any regex"
225272
in
226273

227274
let location_desc =
@@ -280,7 +327,7 @@ let transform_destructuring_let ~mode ~loc pattern_str expr =
280327
let target = `Match in
281328
let pos = loc.loc_start in
282329
let parser = Parser.get_parser ~mode ~target ~pos in
283-
let re, bs, _, flags = Parser.run ~parser ~target ~pos pattern_str in
330+
let r, re, bs, _, flags = Parser.run ~parser ~target ~pos pattern_str in
284331
let capture_names = List.map (fun (name, _, _, _) -> name) (List.rev bs) in
285332

286333
let lhs_pattern =
@@ -325,6 +372,7 @@ let transform_destructuring_let ~mode ~loc pattern_str expr =
325372
[%expr
326373
let _ppx_regexp_v = [%e expr] in
327374
[%e build_exec_match ~loc ~re_var ~continue_next:default_rhs ~on_match]]
375+
|> Regexp.check_alternation_captures ~loc r
328376
in
329377

330378
{ pvb_pat = lhs_pattern; pvb_expr = rhs_expr; pvb_attributes = []; pvb_loc = loc }, [ re_binding ]
@@ -345,7 +393,7 @@ let transform_cases ~mode ~loc cases =
345393
let re_offset = (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 in
346394
let pos = { loc_start with pos_cnum = loc_start.pos_cnum + re_offset } in
347395
let parser = Parser.get_parser ~mode ~target ~pos in
348-
let re, bs, nG, flags = Parser.run ~parser ~target ~pos re_src in
396+
let _, re, bs, nG, flags = Parser.run ~parser ~target ~pos re_src in
349397
let re_str = Pprintast.string_of_expression re in
350398
re, re_str, nG, bs, case.pc_rhs, case.pc_guard, flags)
351399
in
@@ -488,7 +536,7 @@ let transform_mixed_match ~loc ?matched_expr cases acc =
488536
let pos = str_loc.loc_start in
489537
let mode = if "pcre" = ext then `Pcre else `Mik in
490538
let parser = Parser.get_parser ~mode ~target ~pos in
491-
let re, bs, nG, flags = Parser.run ~parser ~pos ~target pat in
539+
let _, re, bs, nG, flags = Parser.run ~parser ~pos ~target pat in
492540
`Ext (re, nG, bs, case.pc_rhs, case.pc_guard, flags)
493541
| _ -> `Regular case
494542
in
@@ -513,9 +561,11 @@ let transform_mixed_match ~loc ?matched_expr cases acc =
513561

514562
let bindings = List.map (fun (_, _, b) -> b) compilations in
515563

564+
let default_rhs = make_default_rhs ~mode:`Mixed ~target ~loc [] in
565+
516566
let rec build_ordered_match input_var case_idx cases comps =
517567
match cases, comps with
518-
| [], _ -> [%expr raise (Match_failure ("", 0, 0))]
568+
| [], _ -> default_rhs
519569
| `Regular case :: rest, _ ->
520570
[%expr
521571
match [%e input_var] with

tests/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
(names test_ppx_regexp test_ppx_regexp_unused)
1010
(modules test_ppx_regexp test_ppx_regexp_unused)
1111
(package ppx_regexp_extended)
12-
(libraries re re.perl)
12+
(libraries ounit2 re re.perl)
1313
(preprocess (pps ppx_regexp_extended)))
1414

1515
(executable

0 commit comments

Comments
 (0)