Skip to content

Commit ef73879

Browse files
committed
guards implemented, removed need for /s in let%mik
1 parent 3b4807d commit ef73879

File tree

5 files changed

+196
-110
lines changed

5 files changed

+196
-110
lines changed

common/mik_parser.mly

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ let unclosed_error what startpos endpos =
4646
%token DASH BAR STAR PLUS QUESTION UNDERSCORE COLON AS
4747
%token INT_CONVERTER FLOAT_CONVERTER EOF
4848

49-
%start <string t> main
49+
%start <string t> main_match_case
50+
%start <string t> main_let_expr
5051
%start <string t> pattern
5152

5253
/* operator precedence from lowest to highest */
@@ -57,12 +58,15 @@ let unclosed_error what startpos endpos =
5758

5859
%%
5960

60-
main:
61+
main_match_case:
6162
| SLASH p = pattern SLASH EOF { p }
6263
| SLASH pattern EOF { unclosed_error "pattern (missing closing '/')" $startpos($1) $endpos }
6364
| SLASH error { syntax_error "Invalid pattern after opening slash" $startpos($2) $endpos($2) }
6465
| error { syntax_error "Expected pattern to start with '/'" $startpos($1) $endpos($1) }
6566

67+
main_let_expr:
68+
| p = pattern EOF { p }
69+
6670
pattern:
6771
| alt_expr { $1 }
6872
| { missing_error "pattern expression" $startpos $endpos }

common/regexp.ml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ module Int_map = struct
3535
include M
3636
end
3737

38-
let parse_exn ?(pos = Lexing.dummy_pos) s =
38+
let parse_exn ~target:_ ?(pos = Lexing.dummy_pos) s =
3939
let l = String.length s in
4040
let get i = if i = l then ')' else s.[i] in
4141

@@ -269,7 +269,7 @@ let parse_exn ?(pos = Lexing.dummy_pos) s =
269269
let j, e = with_loc scan_toplevel 0 in
270270
if j <> l then fail (j, j + 1) "Unbalanced ')'." else e
271271

272-
let parse_mik_exn ?(pos = Lexing.dummy_pos) s =
272+
let parse_mik_exn ~target ?(pos = Lexing.dummy_pos) s =
273273
let lexbuf = Lexing.from_string s in
274274
let mk_loc ?loc pos lexbuf =
275275
let open Lexing in
@@ -312,8 +312,8 @@ let parse_mik_exn ?(pos = Lexing.dummy_pos) s =
312312
};
313313
}
314314
in
315-
316-
try Mik_parser.main Mik_lexer.token lexbuf with
315+
let main = match target with `Match -> Mik_parser.main_match_case | `Let -> Mik_parser.main_let_expr in
316+
try main Mik_lexer.token lexbuf with
317317
| Mik_lexer.Error msg ->
318318
let loc = mk_loc pos lexbuf in
319319
Location.raise_errorf ~loc "%s" msg

common/regexp.mli

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@
1515
* along with this library. If not, see <http://www.gnu.org/licenses/>.
1616
*)
1717

18-
val parse_exn : ?pos:Lexing.position -> string -> string Regexp_types.t
19-
val parse_mik_exn : ?pos:Lexing.position -> string -> string Regexp_types.t
18+
val parse_exn : target:[< `Let | `Match ] -> ?pos:Lexing.position -> string -> string Regexp_types.t
19+
val parse_mik_exn : target:[< `Let | `Match ] -> ?pos:Lexing.position -> string -> string Regexp_types.t

ppx_regexp/ppx_regexp.ml

Lines changed: 159 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,6 @@
1414
* along with this library. If not, see <http://www.gnu.org/licenses/>.
1515
*)
1616

17-
open Ppxlib
18-
open Ast_builder.Default
19-
20-
let error = Location.raise_errorf
21-
22-
let warn ~loc msg e =
23-
let e_msg = estring ~loc msg in
24-
let name = { txt = "ocaml.ppwarning"; loc } in
25-
let payload = PStr [ { pstr_desc = Pstr_eval (e_msg, []); pstr_loc = loc } ] in
26-
{ e with pexp_attributes = attribute ~loc ~name ~payload :: e.pexp_attributes }
27-
28-
module List = struct
29-
include List
30-
31-
let rec fold f = function [] -> fun acc -> acc | x :: xs -> fun acc -> fold f xs (f x acc)
32-
end
33-
34-
module Ctx = struct
35-
(* name -> parsed value) *)
36-
type t = (string, label Regexp_types.t) Hashtbl.t
37-
38-
let empty () = Hashtbl.create 16
39-
let find name ctx = Hashtbl.find_opt ctx name
40-
end
41-
4217
let rec debug_ast indent (ast : string Regexp_types.t) =
4318
let spaces = String.make indent ' ' in
4419
match ast.txt with
@@ -75,6 +50,31 @@ let rec debug_ast indent (ast : string Regexp_types.t) =
7550
Printf.printf "%sCall(%s)\n" spaces (match longident.txt with Longident.Lident s -> s | _ -> "complex")
7651
| _ -> Printf.printf "%sOther\n" spaces
7752

53+
open Ppxlib
54+
open Ast_builder.Default
55+
56+
let error = Location.raise_errorf
57+
58+
let warn ~loc msg e =
59+
let e_msg = estring ~loc msg in
60+
let name = { txt = "ocaml.ppwarning"; loc } in
61+
let payload = PStr [ { pstr_desc = Pstr_eval (e_msg, []); pstr_loc = loc } ] in
62+
{ e with pexp_attributes = attribute ~loc ~name ~payload :: e.pexp_attributes }
63+
64+
module List = struct
65+
include List
66+
67+
let rec fold f = function [] -> fun acc -> acc | x :: xs -> fun acc -> fold f xs (f x acc)
68+
end
69+
70+
module Ctx = struct
71+
(* name -> parsed value) *)
72+
type t = (string, label Regexp_types.t) Hashtbl.t
73+
74+
let empty () = Hashtbl.create 16
75+
let find name ctx = Hashtbl.find_opt ctx name
76+
end
77+
7878
module Regexp = struct
7979
open Regexp_types
8080
include Regexp
@@ -124,7 +124,6 @@ module Regexp = struct
124124
s
125125
| Seq es -> delimit_if (p > p_seq) (String.concat "" (List.map (recurse p_seq) es))
126126
| Alt es -> delimit_if (p > p_alt) (String.concat "|" (List.map (recurse p_alt) es))
127-
(* | Opt e -> delimit_if (p > p_suffix) (recurse p_atom e ^ "?") *)
128127
| Opt e ->
129128
let content = recurse p_atom e in
130129
let result = if p >= p_seq then "(?:" ^ content ^ ")?" else content ^ "?" in
@@ -165,7 +164,6 @@ let rec must_match p i =
165164

166165
let extract_bindings ~(parser : ?pos:position -> string -> string Regexp_types.t) ~ctx ~pos s =
167166
let r = parser ~pos s in
168-
(* debug_ast 0 r; *)
169167
let nG, bs = Regexp.bindings r in
170168
let re_str = Regexp.to_string ~ctx r in
171169
let loc = Location.none in
@@ -190,75 +188,149 @@ let rec wrap_group_bindings ~loc rhs offG = function
190188
let [%p ppat_var ~loc varG] = [%e eG] in
191189
[%e wrap_group_bindings ~loc rhs offG bs]]
192190

191+
let guards_equal g1 g2 =
192+
match g1, g2 with
193+
| None, None -> true
194+
| Some e1, Some e2 ->
195+
(* TODO: simplified, should use a more sophisticated AST comparison *)
196+
Pprintast.string_of_expression e1 = Pprintast.string_of_expression e2
197+
| _ -> false
198+
199+
let group_by_guard cases =
200+
List.fold_left
201+
begin
202+
fun groups case ->
203+
let guard = case.pc_guard in
204+
let rec add_to_groups = function
205+
| [] -> [ guard, [ case ] ]
206+
| (g, cases) :: rest ->
207+
if guards_equal g guard then (g, case :: cases) :: rest else (g, cases) :: add_to_groups rest
208+
in
209+
add_to_groups groups
210+
end
211+
[] cases
212+
|> List.map (fun (g, cases) -> g, List.rev cases)
213+
193214
let transform_cases ~mode ~opts ~loc ~ctx cases =
194215
let aux case =
195-
if case.pc_guard <> None then error ~loc "Guards are not implemented for match%%pcre and match%%mik."
196-
else
197-
Ast_pattern.(parse (pstring __'))
198-
loc case.pc_lhs
199-
begin
200-
fun { txt = re_src; loc = { loc_start; loc_end; _ } } ->
201-
let re_offset = (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 in
202-
let pos = { loc_start with pos_cnum = loc_start.pos_cnum + re_offset; pos_lnum = loc_end.pos_lnum } in
203-
let parser = match mode with `Pcre -> Regexp.parse_exn | `Mik -> Regexp.parse_mik_exn in
204-
let re, bs, nG = extract_bindings ~parser ~pos ~ctx re_src in
205-
re, nG, bs, case.pc_rhs
206-
end
216+
Ast_pattern.(parse (pstring __'))
217+
loc case.pc_lhs
218+
begin
219+
fun { txt = re_src; loc = { loc_start; loc_end; _ } } ->
220+
let re_offset = (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 in
221+
let pos = { loc_start with pos_cnum = loc_start.pos_cnum + re_offset; pos_lnum = loc_end.pos_lnum } in
222+
let parser =
223+
match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match
224+
in
225+
let re, bs, nG = extract_bindings ~parser ~pos ~ctx re_src in
226+
re, nG, bs, case.pc_rhs, case.pc_guard
227+
end
207228
in
208-
let cases, default_rhs =
209-
match List.rev (*_map rewrite_case*) cases with
210-
| { pc_lhs = { ppat_desc = Ppat_any; _ }; pc_rhs; pc_guard = None } :: cases -> cases, pc_rhs
211-
| { pc_lhs = { ppat_desc = Ppat_var var; _ }; pc_rhs; pc_guard = None } :: cases ->
212-
let rhs =
213-
[%expr
214-
let [%p ppat_var ~loc var] = _ppx_regexp_v in
215-
[%e pc_rhs]]
216-
in
217-
cases, rhs
218-
| cases ->
229+
230+
let rec separate_defaults acc = function
231+
| [] -> List.rev acc, []
232+
| ({ pc_lhs = { ppat_desc = Ppat_any; _ }; _ } as case) :: rest -> List.rev acc, case :: rest
233+
| ({ pc_lhs = { ppat_desc = Ppat_var _; _ }; _ } as case) :: rest -> List.rev acc, case :: rest
234+
| case :: rest -> separate_defaults (case :: acc) rest
235+
in
236+
237+
let cases, default_cases = separate_defaults [] cases in
238+
239+
let default_rhs =
240+
match default_cases with
241+
| [] ->
219242
let open Lexing in
220243
let pos = loc.Location.loc_start in
221244
let e0 = estring ~loc pos.pos_fname in
222245
let e1 = eint ~loc pos.pos_lnum in
223246
let e2 = eint ~loc (pos.pos_cnum - pos.pos_bol) in
224247
let e = [%expr raise (Match_failure ([%e e0], [%e e1], [%e e2]))] in
225-
cases, warn ~loc "A universal case is recommended for %pcre and %mik." e
226-
in
227-
let cases = List.rev_map aux cases in
228-
let res = pexp_array ~loc (List.map (fun (re, _, _, _) -> re) cases) in
229-
let opts_expr =
230-
let rec opts_to_expr = function
231-
| [] -> [%expr []]
232-
| `Caseless :: rest -> [%expr `Caseless :: [%e opts_to_expr rest]]
233-
| _ -> assert false
234-
in
235-
opts_to_expr opts
248+
warn ~loc "A universal case is recommended for %pcre and %mik." e
249+
| _ ->
250+
let transformed_default_cases =
251+
List.map
252+
(fun case ->
253+
match case.pc_lhs.ppat_desc with
254+
| Ppat_var var ->
255+
let new_rhs =
256+
[%expr
257+
let [%p ppat_var ~loc var] = _ppx_regexp_v in
258+
[%e case.pc_rhs]]
259+
in
260+
{ case with pc_lhs = ppat_any ~loc; pc_rhs = new_rhs }
261+
| _ -> case (* keep _ patterns as-is *))
262+
default_cases
263+
in
264+
(* build a regular match expression for default cases *)
265+
let default_match = pexp_match ~loc [%expr _ppx_regexp_v] transformed_default_cases in
266+
default_match
236267
in
237-
let comp =
238-
[%expr
239-
let a = Array.map (fun s -> Re.mark (Re.Perl.re ~opts:[%e opts_expr] s)) [%e res] in
240-
let marks = Array.map fst a in
241-
let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
242-
re, marks]
268+
269+
let grouped_cases = group_by_guard cases in
270+
271+
let compiled_groups =
272+
List.map
273+
begin
274+
fun (guard, group_cases) ->
275+
let processed_cases = List.rev_map aux group_cases in
276+
let res = pexp_array ~loc (List.map (fun (re, _, _, _, _) -> re) processed_cases) in
277+
278+
let opts_expr =
279+
let rec opts_to_expr = function
280+
| [] -> [%expr []]
281+
| `Caseless :: rest -> [%expr `Caseless :: [%e opts_to_expr rest]]
282+
| _ -> assert false
283+
in
284+
opts_to_expr opts
285+
in
286+
let comp =
287+
[%expr
288+
let a = Array.map (fun s -> Re.mark (Re.Perl.re ~opts:[%e opts_expr] s)) [%e res] in
289+
let marks = Array.map fst a in
290+
let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
291+
re, marks]
292+
in
293+
let var = fresh_var () in
294+
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
295+
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
296+
297+
let rec handle_cases i offG = function
298+
| [] -> [%expr assert false]
299+
| (_, nG, bs, rhs, _) :: cases ->
300+
[%expr
301+
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then [%e wrap_group_bindings ~loc rhs offG bs]
302+
else [%e handle_cases (i + 1) (offG + nG) cases]]
303+
in
304+
305+
let match_expr =
306+
[%expr
307+
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
308+
| None -> None
309+
| Some _g -> Some [%e handle_cases 0 0 processed_cases]]
310+
in
311+
312+
guard, match_expr, re_binding
313+
end
314+
grouped_cases
243315
in
244-
let var = fresh_var () in
245-
let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in
246-
let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
247316

248-
let rec handle_cases i offG = function
249-
| [] -> [%expr assert false]
250-
| (_, nG, bs, rhs) :: cases ->
317+
let rec try_groups = function
318+
| [] -> default_rhs
319+
| (None, match_expr, _) :: rest ->
320+
[%expr match [%e match_expr] with Some result -> result | None -> [%e try_groups rest]]
321+
| (Some guard_expr, match_expr, _) :: rest ->
251322
[%expr
252-
if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then [%e wrap_group_bindings ~loc rhs offG bs]
253-
else [%e handle_cases (i + 1) (offG + nG) cases]]
323+
if [%e guard_expr] then (match [%e match_expr] with Some result -> result | None -> [%e try_groups rest])
324+
else [%e try_groups rest]]
254325
in
255-
let cases =
256-
[%expr
257-
match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
258-
| None -> [%e default_rhs]
259-
| Some _g -> [%e handle_cases 0 0 cases]]
260-
in
261-
cases, re_binding
326+
327+
let final_expr = try_groups compiled_groups in
328+
let all_bindings = List.map (fun (_, _, b) -> b) compiled_groups in
329+
330+
( [%expr
331+
let _ppx_regexp_v = [%e pexp_ident ~loc { txt = Lident "_ppx_regexp_v"; loc }] in
332+
[%e final_expr]],
333+
all_bindings )
262334

263335
let check_unbounded_recursion ~mode var_name content =
264336
let contains_regex pattern str =
@@ -281,7 +353,7 @@ let check_unbounded_recursion ~mode var_name content =
281353
contains_regex u content || contains_regex n_as content || contains_regex n_as_conv content
282354

283355
let transform_let ~mode ~ctx =
284-
let parser = match mode with `Pcre -> Regexp.parse_exn | `Mik -> Regexp.parse_mik_exn in
356+
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Let | `Mik -> Regexp.parse_mik_exn ~target:`Let in
285357
List.map
286358
begin
287359
fun vb ->
@@ -323,14 +395,14 @@ let transformation ctx =
323395
let e_ext, acc = super#expression e_ext acc in
324396
let make_transformations ~mode ~opts ~loc = function
325397
| Pexp_match (e, cases) ->
326-
let cases, binding = transform_cases ~mode ~opts ~loc ~ctx cases in
398+
let cases, bindings = transform_cases ~mode ~opts ~loc ~ctx cases in
327399
( [%expr
328400
let _ppx_regexp_v = [%e e] in
329401
[%e cases]],
330-
binding :: acc )
402+
bindings @ acc )
331403
| Pexp_function cases ->
332-
let cases, binding = transform_cases ~mode ~opts ~loc ~ctx cases in
333-
[%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc
404+
let cases, bindings = transform_cases ~mode ~opts ~loc ~ctx cases in
405+
[%expr fun _ppx_regexp_v -> [%e cases]], bindings @ acc
334406
| _ -> error ~loc "[%%pcre] and [%%mik] only apply to match, function and global let declarations of strings."
335407
in
336408
match e_ext.pexp_desc with

0 commit comments

Comments
 (0)