Skip to content

Commit 909c0db

Browse files
committed
Support binding group 0 and universal patterns.
1 parent 2f8c17e commit 909c0db

File tree

3 files changed

+67
-22
lines changed

3 files changed

+67
-22
lines changed

README.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,19 @@ match%pcre x with
1111
```
1212
into suitable invocations of the
1313
[Re library](https://github.com/ocaml/ocaml-re). The patterns are plain
14-
strings of the form accepted by `Re_pcre`, except groups can be bound to
15-
variables using the syntax `(?<var>...)`. The type of `var` will be
16-
`string` if a match is of the groups is guaranteed given a match of the
17-
whole pattern, and `string option` if the variable is bound to or nested
18-
below an optionally matched group.
14+
strings of the form accepted by `Re_pcre`, with the following additions:
15+
16+
- `(?<var>...)` defines a group and binds whatever it matches as `var`.
17+
The type of `var` will be `string` if the match is guaranteed given that
18+
the whole pattern matches, and `string option` if the variable is bound
19+
to or nested below an optionally matched group.
20+
21+
- `?<var>` at the start of a pattern binds group 0 as `var : string`.
22+
This may not be the full string if the pattern is unanchored.
23+
24+
A variable is allowed for the universal case and is bound to the matched
25+
string. A regular alias is currently not allowed for patterns, since it is
26+
not obvious whether is should bind the full string or group 0.
1927

2028
## Example
2129

ppx_regexp.ml

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ let extract_bindings ~loc p =
8989
(match stack with
9090
| [] -> error ~loc "Unmached end of group."
9191
| ((Some varG, iG, bs') :: stack') ->
92-
let bs = (varG, iG, true) :: bs in
92+
let bs = (varG, Some iG, true) :: bs in
9393
(bs, bs', stack')
9494
| ((None, _, bs') :: stack') ->
9595
(bs, bs', stack'))
@@ -100,30 +100,53 @@ let extract_bindings ~loc p =
100100
in
101101
parse_normal nG stack' (List.rev_append bs bs') i
102102
in
103-
let bs, nG = parse_normal 0 [] [] 0 in
104-
(Buffer.contents buf, bs, nG)
103+
let parse_first () =
104+
if l >= 4 && p.[0] = '?' && p.[1] = '<' then
105+
let j = String.index_from p 2 '>' in
106+
let varG = String.sub p 2 (j - 2) in
107+
parse_normal 0 [] [varG, None, true] (j + 1)
108+
else
109+
parse_normal 0 [] [] 0
110+
in
111+
let bs, nG = parse_first () in
112+
let re_str = Buffer.contents buf in
113+
(try ignore (Re_pcre.regexp re_str) with
114+
| Re_perl.Not_supported -> error ~loc "Unsupported regular expression."
115+
| Re_perl.Parse_error -> error ~loc "Invalid regular expression.");
116+
(Exp.constant (Const.string re_str), bs, nG)
105117

106-
let transform_cases ~loc e cases =
118+
let transform_cases ~loc cases =
107119
let aux case =
108120
if case.pc_guard <> None then
109121
error ~loc "Guards are not implemented for match%pcre." else
110122
(match case.pc_lhs with
111123
| { ppat_desc = Ppat_constant (Pconst_string (re_src,_));
112124
ppat_loc = loc; _ } ->
113-
let re_str, bs, nG = extract_bindings ~loc re_src in
114-
(try ignore (Re_pcre.regexp re_str) with
115-
| Re_perl.Not_supported -> error ~loc "Unsupported regular expression."
116-
| Re_perl.Parse_error -> error ~loc "Invalid regular expression.");
117-
(Exp.constant (Const.string re_str), nG, bs, case.pc_rhs)
125+
let re, bs, nG = extract_bindings ~loc re_src in
126+
(re, nG, bs, case.pc_rhs)
127+
(*
128+
| {ppat_desc = Ppat_alias
129+
({ ppat_desc = Ppat_constant (Pconst_string (re_src,_));
130+
ppat_loc = loc; _ },
131+
var); _} ->
132+
let re, bs, nG = extract_bindings ~loc re_src in
133+
let rhs =
134+
(* TODO: Should this be (_ppx_regexp_v or Re.Group.get _g 0? *)
135+
[%expr let [%p Pat.var var] = _ppx_regexp_v in [%e case.pc_rhs]] in
136+
(re, nG, bs, rhs)
137+
*)
118138
| {ppat_desc = Ppat_any; _} ->
119139
error ~loc "Universal wildcard must be the last pattern."
120140
| {ppat_loc = loc; _} ->
121141
error ~loc "Regular expression pattern should be a string.")
122142
in
123143
let cases, default_rhs =
124144
(match List.rev cases with
125-
| {pc_lhs = {ppat_desc = Ppat_any; _}; pc_rhs; _} :: cases ->
145+
| {pc_lhs = {ppat_desc = Ppat_any; _}; pc_rhs; pc_guard = None} :: cases ->
126146
(cases, pc_rhs)
147+
| {pc_lhs = {ppat_desc = Ppat_var var; _}; pc_rhs; pc_guard = None} ::
148+
cases ->
149+
(cases, [%expr let [%p Pat.var var] = _ppx_regexp_v in [%e pc_rhs]])
127150
| cases ->
128151
let open Lexing in
129152
let pos = loc.Location.loc_start in
@@ -148,8 +171,11 @@ let transform_cases ~loc e cases =
148171
let rec wrap_groups rhs offG = function
149172
| [] -> rhs
150173
| (varG, iG, mustG) :: bs ->
151-
let eG =
152-
[%expr Re.Group.get _g [%e Exp.constant (Const.int (offG + iG + 1))]]
174+
let eG = match iG with
175+
| None ->
176+
[%expr Re.Group.get _g 0]
177+
| Some iG ->
178+
[%expr Re.Group.get _g [%e Exp.constant (Const.int (offG + iG + 1))]]
153179
in
154180
let eG =
155181
if mustG then eG else
@@ -170,7 +196,7 @@ let transform_cases ~loc e cases =
170196
[%e handle_cases (i + 1) (offG + nG) cases]]
171197
in
172198
[%expr
173-
(match Re.exec_opt (fst [%e e_comp]) [%e e] with
199+
(match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
174200
| None -> [%e default_rhs]
175201
| Some _g -> [%e handle_cases 0 0 cases])]
176202

@@ -181,9 +207,9 @@ let rewrite_expr mapper e_ext =
181207
let loc = e.pexp_loc in
182208
(match e.pexp_desc with
183209
| Pexp_match (e, cases) ->
184-
transform_cases ~loc e cases
210+
[%expr let _ppx_regexp_v = [%e e] in [%e transform_cases ~loc cases]]
185211
| Pexp_function (cases) ->
186-
[%expr fun _s -> [%e transform_cases ~loc [%expr _s] cases]]
212+
[%expr fun _ppx_regexp_v -> [%e transform_cases ~loc cases]]
187213
| _ ->
188214
error ~loc "[%pcre] only applies to match an function.")
189215
| _ -> default_mapper.expr mapper e_ext)

test_ppx_regexp.ml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ let rec test2 s =
4444
| {|^[^{}]*\{(?<s'>.*)\}|} -> test2 s'
4545
| _ -> assert false)
4646

47+
let test3 s =
48+
(match%pcre s with
49+
| {|no(is)((e)) (?<is>is) (g(oo)d)|} -> assert (is = "is")
50+
| {|?<s'>&()[a-zA-Z0-9_-]+(;)|} ->
51+
let i, j = String.index s '&', String.rindex s ';' in
52+
assert (s' = String.sub s i (j - i + 1))
53+
| {|m(o+)re re(gular)? no(is)e, (no )*be(t+)?er|} -> ()
54+
| s' -> assert (s = s'))
55+
4756
let () =
4857
test2 "<>";
4958
test2 "<a>";
@@ -53,7 +62,9 @@ let () =
5362
test2 "a;";
5463
test2 "a;b;c;d;";
5564
test2 "<a;b>";
56-
test2 "Xx{--{a;b;c;}--}yY."
65+
test2 "Xx{--{a;b;c;}--}yY.";
66+
test3 "- + &nbsp; + -";
67+
test3 "catch-all"
5768

5869
(* It should work in a functor, and Re_pcre.regxp should be lifted to the
5970
* top-level. *)
@@ -67,6 +78,6 @@ end
6778
(* It should work as a top-level eval. *)
6879
let r = ref false
6980
;;(match%pcre "" with
70-
| "$^" -> r := true
81+
| {|^$|} -> r := true
7182
| _ -> assert false)
7283
;;assert (!r = true)

0 commit comments

Comments
 (0)