Skip to content

Commit 1e11d4d

Browse files
committed
remove %miksearch, anchor %mikmatch at end by default
- restore `%pcre` to default anchoring
1 parent d5ff493 commit 1e11d4d

File tree

6 files changed

+23
-76
lines changed

6 files changed

+23
-76
lines changed

MIK.md

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -175,32 +175,7 @@ function%mikmatch
175175
This match expression will compile all of the REs in the branches into one, and use marks to find which branch was executed.
176176
Efficient if you have multiple branches.
177177

178-
#### `match%miksearch` and `function%miksearch` (search, not anchored)
179-
180-
The previous extension was **anchored**, meaning, it will only match at the beginning of the string.
181-
182-
This version is not, meaning, for example:
183-
184-
```ocaml
185-
let mik_test = function%mikmatch
186-
| {|/ (digit+ as num) /|} -> ...
187-
...
188-
| _ -> failwith "no match"
189-
190-
let () = mik_test "123" ... (* match *)
191-
let () = mik_test "test123" ... (* ERROR: no match *)
192-
193-
(* but, with %miksearch... *)
194-
let miks_test = function%miksearch
195-
| {|/ (digit+ as num) /|} -> ...
196-
...
197-
| _ -> failwith "no match"
198-
199-
let () = miks_test "123" ... (* match *)
200-
let () = miks_test "test123" ... (* match *)
201-
```
202-
203-
Similar for `%miksearch_i`, except it is case insensitive.
178+
The regexes are anchored both at the beginning, and at the end. So, for example, the first match case will be compiled to `^some regex$`.
204179

205180
#### General match/function
206181

@@ -210,13 +185,8 @@ function
210185
| {%mikmatch|/ some regex /|} -> ...
211186
...
212187
| "another string" -> ...
213-
| {%miksearch|/ some regex /|} -> ... (* non-anchored *)
214-
...
215-
| "yet another string" -> ...
216-
| {%mikmatch_i|/ another regex /|} -> ... (* case insensitive *)
188+
| {%mikmatch_i|/ some regex /|} -> ...
217189
...
218-
| "would you guess it" -> ...
219-
| {%miksearch_i|/ another regex /|} -> ... (* non-anchored, case insensitive *)
220190
| _ -> ...
221191
```
222192

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ string.
6060

6161
### `%mikmatch`
6262

63-
Full [%mikmatch guide](./MIK.md)
63+
Full [%mikmatch guide](./MIK.md).
6464

6565
#### Quick Links
6666
- [Variable capture](./MIK.md#variable-capture)

common/mik_parser.mly

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ let unclosed_error what startpos endpos =
5858
%%
5959

6060
main_match_case:
61-
| SLASH p = pattern SLASH EOF { p }
61+
| SLASH p = pattern SLASH EOF {
62+
let dollar = to_pcre_regex "$" $endpos(p) $endpos($3) in
63+
let loc = make_loc $startpos(p) $endpos($3) in
64+
simplify_seq ~loc [p; dollar]
65+
}
6266
| SLASH pattern EOF { unclosed_error "pattern (missing closing '/')" $startpos($1) $endpos }
6367
| SLASH error { syntax_error "Invalid pattern after opening slash" $startpos($2) $endpos($2) }
6468
| error { syntax_error "Expected pattern to start with '/'" $startpos($1) $endpos($1) }
@@ -137,7 +141,7 @@ basic_atom:
137141
to_pcre_regex $1 $startpos $endpos
138142
}
139143
| EMPTY_STR {
140-
to_pcre_regex "^$" $startpos $endpos
144+
to_pcre_regex "" $startpos $endpos
141145
}
142146
| UNDERSCORE {
143147
to_pcre_regex "." $startpos $endpos

ppx_regexp/ppx_regexp.ml

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ let transformation ctx =
2121
object (self)
2222
inherit [value_binding list] Ast_traverse.fold_map as super
2323

24-
(* Replace the entire method! structure_item in ast_builder.ml with this: *)
2524
method! structure_item item acc =
2625
match item.pstr_desc with
2726
(* let%mik/%pcre x = {|some regex|}*)
@@ -67,32 +66,21 @@ let transformation ctx =
6766
| _ -> Util.error ~loc "[%%pcre] and [%%mik] only apply to match, function and global let declarations of strings."
6867
in
6968
match e_ext.pexp_desc with
70-
(* match%mik/match%pcre and function%mik/function%pcre, anchored *)
69+
(* match%mikmatch/match%pcre and function%mikmatch/function%pcre, mikmatch anchored *)
7170
| Pexp_extension ({ txt = ("pcre" | "mikmatch" | "pcre_i" | "mikmatch_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ])
7271
->
73-
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
74-
let opts =
75-
if String.ends_with ~suffix:"_i" ext then `Caseless :: `Anchored :: Util.default_opts else `Anchored :: Util.default_opts
76-
in
77-
let loc = e.pexp_loc in
78-
make_transformations ~mode ~opts ~loc e.pexp_desc
79-
(* match%miks/match%pcres and function%miks/function%pcres, non anchored (search) *)
80-
| Pexp_extension
81-
({ txt = ("pcres" | "miksearch" | "pcres_i" | "miksearch_i") as ext; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
82-
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
83-
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: Util.default_opts else Util.default_opts in
72+
let mode, opts = if String.starts_with ~prefix:"pcre" ext then `Pcre, [] else `Mik, Util.mikmatch_default_opts in
73+
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: opts else opts in
8474
let loc = e.pexp_loc in
8575
make_transformations ~mode ~opts ~loc e.pexp_desc
86-
(* match smth with | {%mik|some regex|} -> ...*)
76+
(* match smth with | {%mikmatch|some regex|} -> ...*)
8777
| Pexp_match (matched_expr, cases) ->
8878
let has_ext_case =
8979
List.exists
9080
begin
9181
fun case ->
9282
match case.pc_lhs.ppat_desc with
93-
| Ppat_extension
94-
({ txt = "pcre" | "pcres" | "mikmatch" | "miksearch" | "pcre_i" | "pcres_i" | "mikmatch_i" | "miksearch_i"; _ }, _) ->
95-
true
83+
| Ppat_extension ({ txt = "pcre" | "mikmatch" | "pcre_i" | "mikmatch_i"; _ }, _) -> true
9684
| _ -> false
9785
end
9886
cases
@@ -104,9 +92,7 @@ let transformation ctx =
10492
begin
10593
fun case ->
10694
match case.pc_lhs.ppat_desc with
107-
| Ppat_extension
108-
({ txt = "pcre" | "pcres" | "mikmatch" | "miksearch" | "pcre_i" | "pcres_i" | "mikmatch_i" | "miksearch_i"; _ }, _) ->
109-
true
95+
| Ppat_extension ({ txt = "pcre" | "mikmatch" | "pcre_i" | "pcres_i" | "mikmatch_i"; _ }, _) -> true
11096
| _ -> false
11197
end
11298
cases

ppx_regexp/transformations.ml

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ let rec create_opts ~loc = function
133133
| [] -> [%expr []]
134134
| `Caseless :: xs -> [%expr `Caseless :: [%e create_opts ~loc xs]]
135135
| `Anchored :: xs -> [%expr `Anchored :: [%e create_opts ~loc xs]]
136-
| `Dollar_endonly :: xs -> [%expr `Dollar_endonly :: [%e create_opts ~loc xs]]
137136

138137
let extract_bindings ~(parser : ?pos:position -> string -> string Regexp_types.t) ~ctx ~pos s =
139138
let r = parser ~pos s in
@@ -328,29 +327,17 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
328327
(* anchored *)
329328
PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ] ) ->
330329
let pos = str_loc.loc_start in
331-
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
332-
let opts =
333-
if String.ends_with ~suffix:"_i" ext then `Caseless :: `Anchored :: Util.default_opts else `Anchored :: Util.default_opts
334-
in
335-
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match in
336-
let re, bs, nG = extract_bindings ~parser ~pos ~ctx pat in
337-
`Mik (opts, re, nG, bs, case.pc_rhs, case.pc_guard)
338-
| Ppat_extension
339-
( { txt = ("pcres" | "miksearch" | "pcres_i" | "miksearch_i") as ext; _ },
340-
(* search, non anchored *)
341-
PStr [ { pstr_desc = Pstr_eval ({ pexp_desc = Pexp_constant (Pconst_string (pat, str_loc, _)); _ }, _); _ } ] ) ->
342-
let pos = str_loc.loc_start in
343-
let mode = if String.starts_with ~prefix:"pcre" ext then `Pcre else `Mik in
344-
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: Util.default_opts else Util.default_opts in
330+
let mode, opts = if String.starts_with ~prefix:"pcre" ext then `Pcre, [] else `Mik, Util.mikmatch_default_opts in
331+
let opts = if String.ends_with ~suffix:"_i" ext then `Caseless :: opts else opts in
345332
let parser = match mode with `Pcre -> Regexp.parse_exn ~target:`Match | `Mik -> Regexp.parse_mik_exn ~target:`Match in
346333
let re, bs, nG = extract_bindings ~parser ~pos ~ctx pat in
347-
`Mik (opts, re, nG, bs, case.pc_rhs, case.pc_guard)
334+
`Ext (opts, re, nG, bs, case.pc_rhs, case.pc_guard)
348335
| _ -> `Regular case
349336
in
350337

351338
let prepared_cases = List.map aux cases in
352339

353-
let has_mik = List.exists (function `Mik _ -> true | _ -> false) prepared_cases in
340+
let has_mik = List.exists (function `Ext _ -> true | _ -> false) prepared_cases in
354341

355342
if not has_mik then begin
356343
match matched_expr with None -> pexp_function ~loc cases, acc | Some m -> pexp_match ~loc m cases, acc
@@ -361,7 +348,7 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
361348
begin
362349
fun i case ->
363350
match case with
364-
| `Mik (opts, re, _, _, _, _) ->
351+
| `Ext (opts, re, _, _, _, _) ->
365352
let comp_var = Util.fresh_var () in
366353
let opts_expr = create_opts ~loc opts in
367354
let comp_expr = [%expr Re.compile (Re.Perl.re ~opts:[%e opts_expr] [%e re])] in
@@ -385,7 +372,7 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
385372
match [%e input_var] with
386373
| [%p case.pc_lhs] when [%e Option.value case.pc_guard ~default:[%expr true]] -> [%e case.pc_rhs]
387374
| _ -> [%e build_ordered_match input_var (case_idx + 1) rest mik_comps]]
388-
| `Mik (_, _, _, bs, rhs, guard) :: rest, (idx, comp_var, _) :: rest_comps when idx = case_idx ->
375+
| `Ext (_, _, _, bs, rhs, guard) :: rest, (idx, comp_var, _) :: rest_comps when idx = case_idx ->
389376
let comp_ident = pexp_ident ~loc { txt = Lident comp_var; loc } in
390377
[%expr
391378
match Re.exec_opt [%e comp_ident] [%e input_var] with
@@ -398,7 +385,7 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
398385
let guarded_rhs = [%expr if [%e g] then [%e rhs] else [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]] in
399386
wrap_group_bindings ~captured_acc:[] ~loc guarded_rhs 0 bs]
400387
| None -> [%e build_ordered_match input_var (case_idx + 1) rest rest_comps]]
401-
| `Mik _ :: rest, _ ->
388+
| `Ext _ :: rest, _ ->
402389
(* shouldn't happen if indices are correct *)
403390
build_ordered_match input_var (case_idx + 1) rest mik_comps
404391
in

ppx_regexp/util.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
open Ppxlib
22
open Ast_builder.Default
33

4-
let default_opts = [ `Dollar_endonly ]
4+
let mikmatch_default_opts = [ `Anchored ]
55
let error = Location.raise_errorf
66

77
let warn ~loc msg e =

0 commit comments

Comments
 (0)