Skip to content

Commit aad34bb

Browse files
committed
changing %mik to have anchoring behavior, new extensions
- extensions ending in `_i` are case insensitive, as before - extensions with an `s` are `search` extensions, meaning, they aren't anchored (`^` is not automatically added) - fixed issue with type conversion when there was only one capture
1 parent c2e11de commit aad34bb

File tree

4 files changed

+94
-21
lines changed

4 files changed

+94
-21
lines changed

MIK.md

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ Values are also available at the guard level:
9696
let%mik num = {| digit+ |}
9797
9898
let do_something = function%mik
99-
| {|/ ... (num as n) ... /|} when n = 123 -> ...
99+
| {|/ ... (num as n) ... /|} when n = "123" -> ...
100100
| _ -> ...
101101
```
102102

@@ -107,7 +107,7 @@ It is possible to convert variables to `int` of `float` on the fly:
107107
let%mik num = {| digit+ |}
108108
109109
let do_something = function%mik
110-
| {|/ 'd' (num as n : int) ... /|} -> ... (* (n : int) available here *)
110+
| {|/ 'd' (num as n : int) ... /|} when n = 123 -> ... (* (n : int) available here *)
111111
| {|/ 'f' (num as n : float) ... /|} -> ... (* (n : float) available here *)
112112
| _ -> ...
113113
```
@@ -175,6 +175,33 @@ function%mik
175175
This match expression will compile all of the REs in the branches into one, and use marks to find which branch was executed.
176176
Efficient if you have multiple branches.
177177

178+
#### `match%miks` and `function%miks` (search, not anchored)
179+
180+
The previous extension was **anchored**, meaning, it will only match at the beginning of the string.
181+
182+
This version is not, meaning, for example:
183+
184+
```ocaml
185+
let mik_test = function%mik
186+
| {|/ (digit+ as num) /|} -> ...
187+
...
188+
| _ -> failwith "no match"
189+
190+
let () = mik_test "123" ... (* match *)
191+
let () = mik_test "test123" ... (* ERROR: no match *)
192+
193+
(* but, with %miks... *)
194+
let miks_test = function%miks
195+
| {|/ (digit+ as num) /|} -> ...
196+
...
197+
| _ -> failwith "no match"
198+
199+
let () = miks_test "123" ... (* match *)
200+
let () = miks_test "test123" ... (* match *)
201+
```
202+
203+
Similar for `%miks_i`, except it is case insensitive.
204+
178205
#### General match/function
179206

180207
```ocaml
@@ -183,11 +210,17 @@ function
183210
| {%mik|/ some regex /|} -> ...
184211
...
185212
| "another string" -> ...
213+
| {%miks|/ some regex /|} -> ... (* non-anchored *)
214+
...
215+
| "yet another string" -> ...
186216
| {%mik_i|/ another regex /|} -> ... (* case insensitive *)
217+
...
218+
| "would you guess it" -> ...
219+
| {%miks_i|/ another regex /|} -> ... (* non-anchored, case insensitive *)
187220
| _ -> ...
188221
```
189222

190223
This match expression will compile all of the REs **individually**, and test each one in sequence.
191224
Recommended if you only matching one RE. It is less efficient than the first option for more than one RE, but allows raw string matching.
192225

193-
It keeps all of the features of the previous extension, explored in [Semantics](#Semantics_and_Examples)
226+
It keeps all of the features (guards and such) of the previous extension, explored in [Semantics](#Semantics_and_Examples)

ppx_regexp/ppx_regexp.ml

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,26 +67,42 @@ let transformation ctx =
6767
| _ -> Util.error ~loc "[%%pcre] and [%%mik] only apply to match, function and global let declarations of strings."
6868
in
6969
match e_ext.pexp_desc with
70-
(* match%mik/match%pcre and function%mik/function%pcre*)
70+
(* match%mik/match%pcre and function%mik/function%pcre, anchored *)
7171
| Pexp_extension ({ txt = ("pcre" | "mik" | "pcre_i" | "mik_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
7272
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
73-
let opts = if String.ends_with ~suffix:"_i" ext then [ `Caseless ] else [] in
73+
let opts =
74+
if String.ends_with ~suffix:"_i" ext then `Caseless :: `Anchored :: Util.default_opts else `Anchored :: Util.default_opts
75+
in
76+
let loc = e.pexp_loc in
77+
make_transformations ~mode ~opts ~loc e.pexp_desc
78+
(* match%miks/match%pcres and function%miks/function%pcres, non anchored (search) *)
79+
| Pexp_extension ({ txt = ("pcres" | "miks" | "pcres_i" | "miks_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
80+
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
81+
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: Util.default_opts else Util.default_opts in
7482
let loc = e.pexp_loc in
7583
make_transformations ~mode ~opts ~loc e.pexp_desc
7684
(* match smth with | {%mik|some regex|} -> ...*)
7785
| Pexp_match (matched_expr, cases) ->
7886
let has_ext_case =
7987
List.exists
80-
(fun case ->
81-
match case.pc_lhs.ppat_desc with Ppat_extension ({ txt = "pcre" | "mik" | "pcre_i" | "mik_i"; _ }, _) -> true | _ -> false)
88+
begin
89+
fun case ->
90+
match case.pc_lhs.ppat_desc with
91+
| Ppat_extension ({ txt = "pcre" | "pcres" | "mik" | "miks" | "pcre_i" | "pcres_i" | "mik_i" | "miks_i"; _ }, _) -> true
92+
| _ -> false
93+
end
8294
cases
8395
in
8496
if has_ext_case then Transformations.transform_mixed_match ~loc:e_ext.pexp_loc ~ctx ~matched_expr cases acc else e_ext, acc
8597
| Pexp_function cases ->
8698
let has_ext_case =
8799
List.exists
88-
(fun case ->
89-
match case.pc_lhs.ppat_desc with Ppat_extension ({ txt = "pcre" | "mik" | "pcre_i" | "mik_i"; _ }, _) -> true | _ -> false)
100+
begin
101+
fun case ->
102+
match case.pc_lhs.ppat_desc with
103+
| Ppat_extension ({ txt = "pcre" | "pcres" | "mik" | "miks" | "pcre_i" | "pcres_i" | "mik_i" | "miks_i"; _ }, _) -> true
104+
| _ -> false
105+
end
90106
cases
91107
in
92108
if has_ext_case then Transformations.transform_mixed_match ~loc:e_ext.pexp_loc ~ctx cases acc else e_ext, acc

ppx_regexp/transformations.ml

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ module Regexp = struct
2626
nG', ((res, None, Some (Pipe_all_func func), must_match) :: inner_bs) @ bs
2727
| Call _ -> Util.error ~loc "(&...) is not implemented for %%pcre and %%mik."
2828
in
29-
function { Location.txt = Capture_as (idr, _, e); _ } -> recurse true e (0, [ idr, None, None, true ]) | e -> recurse true e (0, [])
29+
function { Location.txt = Capture_as (idr, conv, e); _ } -> recurse true e (0, [ idr, None, conv, true ]) | e -> recurse true e (0, [])
3030

3131
let to_string ~ctx =
3232
let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in
@@ -129,6 +129,12 @@ let rec separate_defaults acc = function
129129
| ({ pc_lhs = { ppat_desc = Ppat_var _; _ }; _ } as case) :: rest -> acc, case :: rest
130130
| case :: rest -> separate_defaults (case :: acc) rest
131131

132+
let rec create_opts ~loc = function
133+
| [] -> [%expr []]
134+
| `Caseless :: xs -> [%expr `Caseless :: [%e create_opts ~loc xs]]
135+
| `Anchored :: xs -> [%expr `Anchored :: [%e create_opts ~loc xs]]
136+
| `Dollar_endonly :: xs -> [%expr `Dollar_endonly :: [%e create_opts ~loc xs]]
137+
132138
let extract_bindings ~(parser : ?pos:position -> string -> string Regexp_types.t) ~ctx ~pos s =
133139
let r = parser ~pos s in
134140
let nG, bs = Regexp.bindings r in
@@ -241,7 +247,7 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
241247

242248
let res = pexp_array ~loc @@ List.map (fun (re, _, _) -> re) processed_cases in
243249

244-
let opts_expr = match opts with [] -> [%expr []] | [ `Caseless ] -> [%expr [ `Caseless ]] | _ -> failwith "Unknown option" in
250+
let opts_expr = create_opts ~loc opts in
245251

246252
let comp =
247253
[%expr
@@ -319,10 +325,23 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
319325
match case.pc_lhs.ppat_desc with
320326
| Ppat_extension
321327
( { txt = ("pcre" | "mik" | "pcre_i" | "mik_i") as ext; _ },
328+
(* anchored *)
329+
PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ] ) ->
330+
let pos = str_loc.loc_start in
331+
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
332+
let opts =
333+
if String.ends_with ~suffix:"_i" ext then `Caseless :: `Anchored :: Util.default_opts else `Anchored :: Util.default_opts
334+
in
335+
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match in
336+
let re, bs, nG = extract_bindings ~parser ~pos ~ctx pat in
337+
`Mik (opts, re, nG, bs, case.pc_rhs, case.pc_guard)
338+
| Ppat_extension
339+
( { txt = ("pcres" | "miks" | "pcres_i" | "miks_i") as ext; _ },
340+
(* search, non anchored *)
322341
PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ] ) ->
323342
let pos = str_loc.loc_start in
324343
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
325-
let opts = if String.ends_with ~suffix:"_i" ext then [ `Caseless ] else [] in
344+
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: Util.default_opts else Util.default_opts in
326345
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match in
327346
let re, bs, nG = extract_bindings ~parser ~pos ~ctx pat in
328347
`Mik (opts, re, nG, bs, case.pc_rhs, case.pc_guard)
@@ -344,9 +363,7 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
344363
match case with
345364
| `Mik (opts, re, _, _, _, _) ->
346365
let comp_var = Util.fresh_var () in
347-
let opts_expr =
348-
match opts with [] -> [%expr []] | [ `Caseless ] -> [%expr [ `Caseless ]] | _ -> failwith "Unknown option"
349-
in
366+
let opts_expr = create_opts ~loc opts in
350367
let comp_expr = [%expr Re.compile (Re.Perl.re ~opts:[%e opts_expr] [%e re])] in
351368
let binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = comp_var; loc }) ~expr:comp_expr in
352369
Some (i, comp_var, binding)

ppx_regexp/util.ml

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
open Ppxlib
22
open Ast_builder.Default
33

4+
let default_opts = [ `Dollar_endonly ]
45
let error = Location.raise_errorf
56

67
let warn ~loc msg e =
@@ -72,6 +73,13 @@ let check_unbounded_recursion ~mode var_name content =
7273
let n_as_conv = Printf.sprintf {|\(\b%s\b as [^)]*:[^)]*\)|} var_name in
7374
contains_regex u content || contains_regex n_as content || contains_regex n_as_conv content
7475

76+
let pp_conv = function
77+
| None -> "NONE"
78+
| Some Regexp_types.Int -> "INT"
79+
| Some Regexp_types.Float -> "FLOAT"
80+
| Some (Regexp_types.Func func_name) -> Format.sprintf "FUNC_NAME: %s." func_name
81+
| Some (Regexp_types.Pipe_all_func func_name) -> Format.sprintf "PIPE_FUNC_NAME: %s" func_name
82+
7583
(* debugging *)
7684
let rec debug_re indent (ast : string Regexp_types.t) =
7785
let spaces = String.make indent ' ' in
@@ -97,14 +105,13 @@ let rec debug_re indent (ast : string Regexp_types.t) =
97105
Printf.printf "%sCapture(\n" spaces;
98106
debug_re (indent + 2) expr;
99107
Printf.printf "%s)\n" spaces
100-
| Capture_as (name, _, expr) ->
101-
Printf.printf "%sCapture_as(%s,\n" spaces name.txt;
108+
| Capture_as (name, conv, expr) ->
109+
Printf.printf "%sCapture_as(%s, %s,\n" spaces (pp_conv conv) name.txt;
102110
debug_re (indent + 2) expr;
103111
Printf.printf "%s)\n" spaces
104-
| Named_subs (name, alias, _, expr) ->
105-
Printf.printf "%sNamed_subs(%s, %s,\n" spaces name.txt (match alias with Some a -> a.txt | None -> "None");
112+
| Named_subs (name, alias, conv, expr) ->
113+
Printf.printf "%sNamed_subs(%s, %s, %s,\n" spaces name.txt (pp_conv conv) (match alias with Some a -> a.txt | None -> "None");
106114
debug_re (indent + 2) expr;
107115
Printf.printf "%s)\n" spaces
108-
| Call longident ->
109-
Printf.printf "%sCall(%s)\n" spaces (match longident.txt with Longident.Lident s -> s | _ -> "complex")
116+
| Call longident -> Printf.printf "%sCall(%s)\n" spaces (match longident.txt with Longident.Lident s -> s | _ -> "complex")
110117
| _ -> Printf.printf "%sOther\n" spaces

0 commit comments

Comments
 (0)