Skip to content

Commit c829e10

Browse files
committed
feature: variable substitution
1 parent 18ca331 commit c829e10

File tree

3 files changed

+98
-75
lines changed

3 files changed

+98
-75
lines changed

common/regexp.ml

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -253,34 +253,35 @@ let parse_exn ?(pos = Lexing.dummy_pos) s =
253253
scan_seq_item j (e :: acc)
254254
| _ -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc))
255255

256-
and scan_group i =
257-
(match get i with
258-
| '?' ->
259-
if i + 1 = l then fail (i - 1, i) "Unbalanced '('." else
260-
(match s.[i + 1] with
261-
| '&' ->
262-
let j, idr = scan_ident (i + 2) in
263-
if get j = ':' then
264-
let k, lidr = scan_longident (j + 1) in
265-
(k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr)))
266-
else
267-
let k, lidr = scan_longident_cont idr.Location.txt j in
268-
(k, Call lidr)
269-
| '<' ->
270-
let j, idr = scan_ident (i + 2) in
271-
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'." else
272-
let k, e = with_loc scan_alt (j + 1) in
273-
(k, Capture_as (idr, e))
274-
| ':' ->
275-
scan_alt (i + 2)
276-
| '#' ->
277-
(try (String.index_from s (i + 2) ')', Seq []) with
278-
| Not_found -> fail (i - 1, i + 1) "Unterminated comment.")
279-
| _ ->
280-
fail (i, i + 2) "Invalid group modifier.")
281-
| '+' -> let j, e = with_loc scan_alt (i + 1) in (j, Capture e)
282-
| '*' | '{' -> fail (i, i + 1) "Invalid group modifier."
283-
| _ -> scan_alt i)
256+
and scan_group i =
257+
match get i with
258+
| '?' ->
259+
if i + 1 = l then fail (i - 1, i) "Unbalanced '('."
260+
else (
261+
match s.[i + 1] with
262+
| '&' ->
263+
let j, idr = scan_ident (i + 2) in
264+
if get j = ':' then (
265+
let k, lidr = scan_longident (j + 1) in
266+
k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr)))
267+
else (
268+
let k, lidr = scan_longident_cont idr.Location.txt j in
269+
k, Call lidr)
270+
| '<' ->
271+
let j, idr = scan_ident (i + 2) in
272+
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'."
273+
else (
274+
let k, e = with_loc scan_alt (j + 1) in
275+
k, Capture_as (idr, e))
276+
| ':' -> scan_alt (i + 2)
277+
| '#' ->
278+
(try String.index_from s (i + 2) ')', Seq [] with Not_found -> fail (i - 1, i + 1) "Unterminated comment.")
279+
| _ -> fail (i, i + 2) "Invalid group modifier.")
280+
| '+' ->
281+
let j, e = with_loc scan_alt (i + 1) in
282+
j, Capture e
283+
| '*' | '{' -> fail (i, i + 1) "Invalid group modifier."
284+
| _ -> scan_alt i
284285
in
285286

286287
(* Top-Level *)

ppx_regexp.opam

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ bug-reports: "https://github.com/paurkedal/ppx_regexp/issues"
1010
depends: [
1111
"ocaml" {>= "4.02.3"}
1212
"dune" {>= "1.11"}
13-
"ppxlib" {>= "0.9.0"}
13+
"ppxlib" {>= "0.9.0" & <= "0.35.0"}
1414
"re" {>= "1.7.2"}
1515
"qcheck" {with-test}
1616
]

ppx_regexp/ppx_regexp.ml

Lines changed: 68 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -51,46 +51,42 @@ module Regexp = struct
5151
| Capture_as (idr, e) ->
5252
fun (nG, bs) ->
5353
recurse must_match e (nG + 1, (idr, Some nG, must_match) :: bs)
54-
| Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
54+
| Call _ -> fun acc -> acc)
5555
in
5656
(function
5757
| {Location.txt = Capture_as (idr, e); _} ->
5858
recurse true e (0, [idr, None, true])
5959
| e ->
6060
recurse true e (0, []))
6161

62-
let to_string =
63-
let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in
64-
let delimit_if b s = if b then "(?:" ^ s ^ ")" else s in
65-
let rec recurse p (e' : _ Location.loc) =
66-
let loc = e'.Location.loc in
67-
(match e'.Location.txt with
68-
| Code s ->
69-
(* Delimiters not needed as Regexp.parse_exn only returns single
70-
* chars, csets, and escape sequences. *)
71-
s
72-
| Seq es ->
73-
delimit_if (p > p_seq)
74-
(String.concat "" (List.map (recurse p_seq) es))
75-
| Alt es ->
76-
delimit_if (p > p_alt)
77-
(String.concat "|" (List.map (recurse p_alt) es))
78-
| Opt e ->
79-
delimit_if (p > p_suffix) (recurse p_atom e ^ "?")
80-
| Repeat ({Location.txt = (i, j_opt); _}, e) ->
81-
let j_str = match j_opt with None -> "" | Some j -> string_of_int j in
82-
delimit_if (p > p_suffix)
83-
(Printf.sprintf "%s{%d,%s}" (recurse p_atom e) i j_str)
84-
| Nongreedy e -> recurse p_suffix e ^ "?"
85-
| Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre."
86-
| Capture_as (_, e) -> "(" ^ recurse p_alt e ^ ")"
87-
| Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
88-
in
89-
(function
90-
| {Location.txt = Capture_as (_, e); _} ->
91-
recurse 0 e
92-
| e ->
93-
recurse 0 e)
62+
let rec to_re_expr ~loc (e : _ Location.loc) =
63+
let open Ast_builder.Default in
64+
match e.Location.txt with
65+
| Code s ->
66+
[%expr Re.Perl.re [%e estring ~loc s]]
67+
| Seq es ->
68+
let exprs = List.map (to_re_expr ~loc) es in
69+
[%expr Re.seq [%e elist ~loc exprs]]
70+
| Alt es ->
71+
let exprs = List.map (to_re_expr ~loc) es in
72+
[%expr Re.alt [%e elist ~loc exprs]]
73+
| Opt e ->
74+
[%expr Re.opt [%e to_re_expr ~loc e]]
75+
| Repeat ({Location.txt = (i, j_opt); _}, e) ->
76+
let e_i = eint ~loc i in
77+
let e_j = match j_opt with
78+
| None -> [%expr None]
79+
| Some j -> [%expr Some [%e eint ~loc j]]
80+
in
81+
[%expr Re.repn [%e to_re_expr ~loc e] [%e e_i] [%e e_j]]
82+
| Nongreedy e ->
83+
[%expr Re.non_greedy [%e to_re_expr ~loc e]]
84+
| Capture e ->
85+
[%expr Re.group [%e to_re_expr ~loc e]]
86+
| Capture_as (_, e) ->
87+
[%expr Re.group [%e to_re_expr ~loc e]]
88+
| Call lid ->
89+
pexp_ident ~loc lid
9490
end
9591

9692
let fresh_var =
@@ -116,9 +112,9 @@ let rec must_match p i =
116112
let extract_bindings ~pos s =
117113
let r = Regexp.parse_exn ~pos s in
118114
let nG, bs = Regexp.bindings r in
119-
let re_str = Regexp.to_string r in
120115
let loc = Location.none in
121-
(estring ~loc re_str, bs, nG)
116+
let re_expr = Regexp.to_re_expr ~loc r in
117+
(re_expr, bs, nG)
122118

123119
let rec wrap_group_bindings ~loc rhs offG = function
124120
| [] -> rhs
@@ -137,6 +133,19 @@ let rec wrap_group_bindings ~loc rhs offG = function
137133
let [%p ppat_var ~loc varG] = [%e eG] in
138134
[%e wrap_group_bindings ~loc rhs offG bs]]
139135

136+
let transform_let =
137+
List.map
138+
begin
139+
fun vb ->
140+
match vb.pvb_pat.ppat_desc, vb.pvb_expr.pexp_desc with
141+
| Ppat_var { txt = _; loc }, Pexp_constant (Pconst_string (value, _, _)) ->
142+
let parsed = Regexp.parse_exn value in
143+
let re_expr = Regexp.to_re_expr ~loc parsed in
144+
let expr = [%expr [%e re_expr]] in
145+
{ vb with pvb_expr = expr }
146+
| _ -> vb
147+
end
148+
140149
let transform_cases ~loc cases =
141150
let aux case =
142151
if case.pc_guard <> None then
@@ -173,7 +182,7 @@ let transform_cases ~loc cases =
173182
let cases = List.rev_map aux cases in
174183
let res = pexp_array ~loc (List.map (fun (re, _, _, _) -> re) cases) in
175184
let comp = [%expr
176-
let a = Array.map (fun s -> Re.mark (Re.Perl.re s)) [%e res] in
185+
let a = Array.map (fun re -> Re.mark re) [%e res] in
177186
let marks = Array.map fst a in
178187
let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
179188
(re, marks)
@@ -202,33 +211,46 @@ let transform_cases ~loc cases =
202211
(cases, re_binding)
203212

204213
let transformation = object
205-
inherit [value_binding list] Ast_traverse.fold_map as super
214+
inherit [value_binding list * value_binding list] Ast_traverse.fold_map as super
215+
216+
method! structure_item item (acc, let_acc) =
217+
match item.pstr_desc with
218+
(* let%pcre x = {|some regex|}*)
219+
| Pstr_extension (({ txt = "pcre"; loc }, PStr [ { pstr_desc = Pstr_value (Nonrecursive, vbs); _ } ]), _) ->
220+
let bindings = transform_let vbs in
221+
let dummy = {item with pstr_desc = Pstr_eval ([%expr ()], [])} in
222+
dummy, (acc, bindings @ let_acc)
223+
| _ -> super#structure_item item (acc, let_acc)
206224

207225
method! expression e_ext acc =
208-
let e_ext, acc = super#expression e_ext acc in
226+
let e_ext, (acc, let_acc) = super#expression e_ext acc in
209227
(match e_ext.pexp_desc with
210228
| Pexp_extension
211229
({txt = "pcre"; _}, PStr [{pstr_desc = Pstr_eval (e, _); _}]) ->
212230
let loc = e.pexp_loc in
213231
(match e.pexp_desc with
214232
| Pexp_match (e, cases) ->
215233
let cases, binding = transform_cases ~loc cases in
216-
([%expr let _ppx_regexp_v = [%e e] in [%e cases]], binding :: acc)
234+
([%expr let _ppx_regexp_v = [%e e] in [%e cases]], (binding :: acc, let_acc))
217235
| Pexp_function (cases) ->
218236
let cases, binding = transform_cases ~loc cases in
219-
([%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc)
237+
([%expr fun _ppx_regexp_v -> [%e cases]], (binding :: acc, let_acc))
220238
| _ ->
221239
error ~loc "[%%pcre] only applies to match an function.")
222-
| _ -> (e_ext, acc))
240+
| _ -> (e_ext, (acc, let_acc)))
223241
end
224242

225243
let impl str =
226-
let str, rev_bindings = transformation#structure str [] in
244+
let str, (rev_bindings, let_bindings) = transformation#structure str ([], []) in
227245
if rev_bindings = [] then str else
228-
let re_str =
229246
let loc = Location.none in
230-
[%str open (struct [%%i pstr_value ~loc Nonrecursive rev_bindings] end)]
231-
in
232-
re_str @ str
247+
let all_bindings = List.rev let_bindings @ rev_bindings in
248+
let struct_items =
249+
List.fold_left (fun acc binding ->
250+
acc @ [%str let [%p binding.pvb_pat] = [%e binding.pvb_expr]]
251+
) [] all_bindings
252+
in
253+
let mod_expr = pmod_structure ~loc struct_items in
254+
[%str open [%m mod_expr]] @ str
233255

234256
let () = Driver.register_transformation ~impl "ppx_regexp"

0 commit comments

Comments
 (0)