Skip to content

Commit 03e1a86

Browse files
committed
feature: variable substitution
1 parent 18ca331 commit 03e1a86

File tree

4 files changed

+158
-37
lines changed

4 files changed

+158
-37
lines changed

common/regexp.ml

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ and 'a node =
2929
| Nongreedy of 'a t
3030
| Capture of 'a t
3131
| Capture_as of string Location.loc * 'a t
32+
| Named_subs of string Location.loc * string Location.loc option * 'a t
33+
| Unnamed_subs of string Location.loc * 'a t
3234
| Call of Longident.t Location.loc
3335
(* TODO: | Case_sense of t | Case_blind of t *)
3436

@@ -253,34 +255,62 @@ let parse_exn ?(pos = Lexing.dummy_pos) s =
253255
scan_seq_item j (e :: acc)
254256
| _ -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc))
255257

256-
and scan_group i =
257-
(match get i with
258-
| '?' ->
259-
if i + 1 = l then fail (i - 1, i) "Unbalanced '('." else
260-
(match s.[i + 1] with
261-
| '&' ->
262-
let j, idr = scan_ident (i + 2) in
263-
if get j = ':' then
264-
let k, lidr = scan_longident (j + 1) in
265-
(k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr)))
266-
else
267-
let k, lidr = scan_longident_cont idr.Location.txt j in
268-
(k, Call lidr)
269-
| '<' ->
270-
let j, idr = scan_ident (i + 2) in
271-
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'." else
258+
and scan_group i =
259+
match get i with
260+
| '?' ->
261+
if i + 1 = l then fail (i - 1, i) "Unbalanced '('."
262+
else (
263+
match s.[i + 1] with
264+
| '&' ->
265+
let j, idr = scan_ident (i + 2) in
266+
if get j = ':' then (
267+
let k, lidr = scan_longident (j + 1) in
268+
k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr)))
269+
else (
270+
let k, lidr = scan_longident_cont idr.Location.txt j in
271+
k, Call lidr)
272+
| '<' ->
273+
let j, idr = scan_ident (i + 2) in
274+
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'."
275+
else (
276+
let k, e = with_loc scan_alt (j + 1) in
277+
k, Capture_as (idr, e))
278+
| 'N' ->
279+
let j, idr = scan_ident (i + 3) in
280+
begin
281+
match get j with
282+
| '>' ->
272283
let k, e = with_loc scan_alt (j + 1) in
273-
(k, Capture_as (idr, e))
274-
| ':' ->
275-
scan_alt (i + 2)
276-
| '#' ->
277-
(try (String.index_from s (i + 2) ')', Seq []) with
278-
| Not_found -> fail (i - 1, i + 1) "Unterminated comment.")
279-
| _ ->
280-
fail (i, i + 2) "Invalid group modifier.")
281-
| '+' -> let j, e = with_loc scan_alt (i + 1) in (j, Capture e)
282-
| '*' | '{' -> fail (i, i + 1) "Invalid group modifier."
283-
| _ -> scan_alt i)
284+
k, Named_subs (idr, None, e)
285+
| ' ' ->
286+
let j, jdr = scan_ident (j + 1) in
287+
if jdr.txt = "as" && get j = ' ' then begin
288+
let j, kdr = scan_ident (j + 1) in
289+
if get j <> '>' then fail (j, j + 1) "Unbalanced '<'."
290+
else begin
291+
let k, e = with_loc scan_alt (j + 1) in
292+
k, Named_subs (idr, Some kdr, e)
293+
end
294+
end
295+
else fail (j - 2, j) "Substring name missing."
296+
| _ -> fail (i, i + 1) "Unbalanced '<'."
297+
end
298+
| 'U' ->
299+
let j, idr = scan_ident (i + 3) in
300+
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'."
301+
else begin
302+
let k, e = with_loc scan_alt (j + 1) in
303+
k, Unnamed_subs (idr, e)
304+
end
305+
| ':' -> scan_alt (i + 2)
306+
| '#' ->
307+
(try String.index_from s (i + 2) ')', Seq [] with Not_found -> fail (i - 1, i + 1) "Unterminated comment.")
308+
| _ -> fail (i, i + 2) "Invalid group modifier.")
309+
| '+' ->
310+
let j, e = with_loc scan_alt (i + 1) in
311+
j, Capture e
312+
| '*' | '{' -> fail (i, i + 1) "Invalid group modifier."
313+
| _ -> scan_alt i
284314
in
285315

286316
(* Top-Level *)

common/regexp.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ and 'a node =
2525
| Nongreedy of 'a t
2626
| Capture of 'a t
2727
| Capture_as of string Location.loc * 'a t
28+
| Named_subs of string Location.loc * string Location.loc option * 'a t
29+
| Unnamed_subs of string Location.loc * 'a t
2830
| Call of Longident.t Location.loc
2931
(* TODO: | Case_sense of t | Case_blind of t *)
3032

ppx_regexp.opam

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ bug-reports: "https://github.com/paurkedal/ppx_regexp/issues"
1010
depends: [
1111
"ocaml" {>= "4.02.3"}
1212
"dune" {>= "1.11"}
13-
"ppxlib" {>= "0.9.0"}
13+
"ppxlib" {>= "0.9.0" & <= "0.35.0"}
1414
"re" {>= "1.7.2"}
1515
"qcheck" {with-test}
1616
]

ppx_regexp/ppx_regexp.ml

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,27 @@ module List = struct
3333
| x :: xs -> fun acc -> fold f xs (f x acc)
3434
end
3535

36+
module Ctx = struct
37+
(* name -> parsed value) *)
38+
type t = (string, label Regexp.t) Hashtbl.t
39+
40+
let empty () = Hashtbl.create 16
41+
let find name ctx = Hashtbl.find_opt ctx name
42+
end
43+
44+
let check_unbounded_recursion var_name content =
45+
let contains_regex pattern str =
46+
let re = Re.Str.regexp pattern in
47+
try
48+
Re.Str.search_forward re str 0 |> ignore;
49+
true
50+
with Not_found -> false
51+
in
52+
let u = Printf.sprintf {|\(\?U<%s>\)|} var_name in
53+
let n = Printf.sprintf {|\(\?N<%s>\)|} var_name in
54+
let n_as = Printf.sprintf {|\(\?N<%s as [^>]*>\)|} var_name in
55+
contains_regex u content || contains_regex n content || contains_regex n_as content
56+
3657
module Regexp = struct
3758
include Regexp
3859

@@ -51,6 +72,10 @@ module Regexp = struct
5172
| Capture_as (idr, e) ->
5273
fun (nG, bs) ->
5374
recurse must_match e (nG + 1, (idr, Some nG, must_match) :: bs)
75+
| Named_subs (idr, None, e) | Named_subs (_, Some idr, e) ->
76+
fun (nG, bs) ->
77+
recurse must_match e (nG + 1, (idr, Some nG, must_match) :: bs)
78+
| Unnamed_subs (_, e) -> recurse must_match e
5479
| Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
5580
in
5681
(function
@@ -59,9 +84,18 @@ module Regexp = struct
5984
| e ->
6085
recurse true e (0, []))
6186

62-
let to_string =
87+
let to_string ~ctx =
6388
let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in
6489
let delimit_if b s = if b then "(?:" ^ s ^ ")" else s in
90+
let get_parsed ~loc idr =
91+
let var_name = idr.txt in
92+
let content =
93+
match Ctx.find var_name ctx with
94+
| Some value -> value
95+
| None -> error ~loc "Variable '%s' not found. %%pcre and %%mik only support global let bindings for substitution." var_name
96+
in
97+
content
98+
in
6599
let rec recurse p (e' : _ Location.loc) =
66100
let loc = e'.Location.loc in
67101
(match e'.Location.txt with
@@ -84,6 +118,12 @@ module Regexp = struct
84118
| Nongreedy e -> recurse p_suffix e ^ "?"
85119
| Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre."
86120
| Capture_as (_, e) -> "(" ^ recurse p_alt e ^ ")"
121+
| Named_subs (idr, _, _) ->
122+
let content = get_parsed ~loc idr in
123+
"(" ^ recurse p_alt content ^ ")"
124+
| Unnamed_subs (idr, _) ->
125+
let content = get_parsed ~loc idr in
126+
recurse p_atom content
87127
| Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
88128
in
89129
(function
@@ -113,10 +153,10 @@ let rec must_match p i =
113153
else
114154
true
115155

116-
let extract_bindings ~pos s =
156+
let extract_bindings ~ctx ~pos s =
117157
let r = Regexp.parse_exn ~pos s in
118158
let nG, bs = Regexp.bindings r in
119-
let re_str = Regexp.to_string r in
159+
let re_str = Regexp.to_string ~ctx r in
120160
let loc = Location.none in
121161
(estring ~loc re_str, bs, nG)
122162

@@ -137,7 +177,27 @@ let rec wrap_group_bindings ~loc rhs offG = function
137177
let [%p ppat_var ~loc varG] = [%e eG] in
138178
[%e wrap_group_bindings ~loc rhs offG bs]]
139179

140-
let transform_cases ~loc cases =
180+
let transform_let ~ctx =
181+
List.map
182+
begin
183+
fun vb ->
184+
match vb.pvb_pat.ppat_desc, vb.pvb_expr.pexp_desc with
185+
| Ppat_var { txt = var_name; loc }, Pexp_constant (Pconst_string (value, _, _)) ->
186+
if check_unbounded_recursion var_name value then error ~loc "Unbounded recursion detected!"
187+
else begin
188+
let parsed = Regexp.parse_exn value in
189+
Hashtbl.replace ctx var_name parsed;
190+
let warning_attr =
191+
attribute ~loc ~name:{ txt = "ocaml.warning"; loc }
192+
~payload:(PStr [ { pstr_desc = Pstr_eval (estring ~loc "-32", []); pstr_loc = loc } ])
193+
in
194+
{ vb with pvb_attributes = warning_attr :: vb.pvb_attributes }
195+
end
196+
| _ -> vb
197+
end
198+
199+
200+
let transform_cases ~ctx ~loc cases =
141201
let aux case =
142202
if case.pc_guard <> None then
143203
error ~loc "Guards are not implemented for match%%pcre."
@@ -148,7 +208,7 @@ let transform_cases ~loc cases =
148208
(loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2
149209
in
150210
let pos = {loc_start with pos_cnum = loc_start.pos_cnum + re_offset} in
151-
let re, bs, nG = extract_bindings ~pos re_src in
211+
let re, bs, nG = extract_bindings ~ctx ~pos re_src in
152212
(re, nG, bs, case.pc_rhs)
153213
end
154214
in
@@ -201,9 +261,37 @@ let transform_cases ~loc cases =
201261
in
202262
(cases, re_binding)
203263

204-
let transformation = object
264+
let transformation ctx = object (self)
205265
inherit [value_binding list] Ast_traverse.fold_map as super
206266

267+
method! structure_item item acc =
268+
match item.pstr_desc with
269+
(* let%pcre x = {|some regex|}*)
270+
| Pstr_extension (({ txt = "pcre"; _ }, PStr [ { pstr_desc = Pstr_value (rec_flag, vbs); _ } ]), _) ->
271+
let bindings = transform_let ~ctx vbs in
272+
let new_item = { item with pstr_desc = Pstr_value (rec_flag, bindings) } in
273+
new_item, acc
274+
(* let x = expression (which might contain %pcre, like {%pcre|...|}) *)
275+
| Pstr_value (rec_flag, vbs) ->
276+
let processed_vbs, collected_bindings =
277+
List.fold_left
278+
(fun (vbs_acc, bindings_acc) vb ->
279+
match vb.pvb_expr.pexp_desc with
280+
| Pexp_extension ({ txt = "pcre"; _ }, PStr [ { pstr_desc = Pstr_eval (expr, _); _ } ])
281+
when match expr.pexp_desc with Pexp_constant (Pconst_string _) -> true | _ -> false ->
282+
let new_vb = { vb with pvb_expr = expr } in
283+
let transformed = transform_let ~ctx [ new_vb ] in
284+
List.hd transformed :: vbs_acc, bindings_acc
285+
| _ ->
286+
let new_expr, new_bindings = self#expression vb.pvb_expr bindings_acc in
287+
let new_vb = { vb with pvb_expr = new_expr } in
288+
new_vb :: vbs_acc, new_bindings)
289+
([], acc) vbs
290+
in
291+
let new_item = { item with pstr_desc = Pstr_value (rec_flag, List.rev processed_vbs) } in
292+
new_item, collected_bindings
293+
| _ -> super#structure_item item acc
294+
207295
method! expression e_ext acc =
208296
let e_ext, acc = super#expression e_ext acc in
209297
(match e_ext.pexp_desc with
@@ -212,18 +300,19 @@ let transformation = object
212300
let loc = e.pexp_loc in
213301
(match e.pexp_desc with
214302
| Pexp_match (e, cases) ->
215-
let cases, binding = transform_cases ~loc cases in
303+
let cases, binding = transform_cases ~ctx ~loc cases in
216304
([%expr let _ppx_regexp_v = [%e e] in [%e cases]], binding :: acc)
217305
| Pexp_function (cases) ->
218-
let cases, binding = transform_cases ~loc cases in
306+
let cases, binding = transform_cases ~ctx ~loc cases in
219307
([%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc)
220308
| _ ->
221309
error ~loc "[%%pcre] only applies to match an function.")
222310
| _ -> (e_ext, acc))
223311
end
224312

225313
let impl str =
226-
let str, rev_bindings = transformation#structure str [] in
314+
let ctx = Ctx.empty () in
315+
let str, rev_bindings = (transformation ctx)#structure str [] in
227316
if rev_bindings = [] then str else
228317
let re_str =
229318
let loc = Location.none in

0 commit comments

Comments
 (0)