@@ -33,6 +33,27 @@ module List = struct
33
33
| x :: xs -> fun acc -> fold f xs (f x acc)
34
34
end
35
35
36
+ module Ctx = struct
37
+ (* name -> parsed value) *)
38
+ type t = (string , label Regexp .t ) Hashtbl .t
39
+
40
+ let empty () = Hashtbl. create 16
41
+ let find name ctx = Hashtbl. find_opt ctx name
42
+ end
43
+
44
+ let check_unbounded_recursion var_name content =
45
+ let contains_regex pattern str =
46
+ let re = Re.Str. regexp pattern in
47
+ try
48
+ Re.Str. search_forward re str 0 |> ignore;
49
+ true
50
+ with Not_found -> false
51
+ in
52
+ let u = Printf. sprintf {| \(\?U < % s> \)| } var_name in
53
+ let n = Printf. sprintf {| \(\?N < % s> \)| } var_name in
54
+ let n_as = Printf. sprintf {| \(\?N < % s as [^> ]*> \)| } var_name in
55
+ contains_regex u content || contains_regex n content || contains_regex n_as content
56
+
36
57
module Regexp = struct
37
58
include Regexp
38
59
@@ -51,6 +72,10 @@ module Regexp = struct
51
72
| Capture_as (idr , e ) ->
52
73
fun (nG , bs ) ->
53
74
recurse must_match e (nG + 1 , (idr, Some nG, must_match) :: bs)
75
+ | Named_subs (idr , None , e ) | Named_subs (_ , Some idr , e ) ->
76
+ fun (nG , bs ) ->
77
+ recurse must_match e (nG + 1 , (idr, Some nG, must_match) :: bs)
78
+ | Unnamed_subs (_ , e ) -> recurse must_match e
54
79
| Call _ -> error ~loc " (&...) is not implemented for %%pcre." )
55
80
in
56
81
(function
@@ -59,9 +84,18 @@ module Regexp = struct
59
84
| e ->
60
85
recurse true e (0 , [] ))
61
86
62
- let to_string =
87
+ let to_string ~ ctx =
63
88
let p_alt, p_seq, p_suffix, p_atom = 0 , 1 , 2 , 3 in
64
89
let delimit_if b s = if b then " (?:" ^ s ^ " )" else s in
90
+ let get_parsed ~loc idr =
91
+ let var_name = idr.txt in
92
+ let content =
93
+ match Ctx. find var_name ctx with
94
+ | Some value -> value
95
+ | None -> error ~loc " Variable '%s' not found. %%pcre and %%mik only support global let bindings for substitution." var_name
96
+ in
97
+ content
98
+ in
65
99
let rec recurse p (e' : _ Location.loc ) =
66
100
let loc = e'.Location. loc in
67
101
(match e'.Location. txt with
@@ -84,6 +118,12 @@ module Regexp = struct
84
118
| Nongreedy e -> recurse p_suffix e ^ " ?"
85
119
| Capture _ -> error ~loc " Unnamed capture is not allowed for %%pcre."
86
120
| Capture_as (_ , e ) -> " (" ^ recurse p_alt e ^ " )"
121
+ | Named_subs (idr , _ , _ ) ->
122
+ let content = get_parsed ~loc idr in
123
+ " (" ^ recurse p_alt content ^ " )"
124
+ | Unnamed_subs (idr , _ ) ->
125
+ let content = get_parsed ~loc idr in
126
+ recurse p_atom content
87
127
| Call _ -> error ~loc " (&...) is not implemented for %%pcre." )
88
128
in
89
129
(function
@@ -113,10 +153,10 @@ let rec must_match p i =
113
153
else
114
154
true
115
155
116
- let extract_bindings ~pos s =
156
+ let extract_bindings ~ctx ~ pos s =
117
157
let r = Regexp. parse_exn ~pos s in
118
158
let nG, bs = Regexp. bindings r in
119
- let re_str = Regexp. to_string r in
159
+ let re_str = Regexp. to_string ~ctx r in
120
160
let loc = Location. none in
121
161
(estring ~loc re_str, bs, nG)
122
162
@@ -137,7 +177,27 @@ let rec wrap_group_bindings ~loc rhs offG = function
137
177
let [% p ppat_var ~loc varG] = [% e eG] in
138
178
[% e wrap_group_bindings ~loc rhs offG bs]]
139
179
140
- let transform_cases ~loc cases =
180
+ let transform_let ~ctx =
181
+ List. map
182
+ begin
183
+ fun vb ->
184
+ match vb.pvb_pat.ppat_desc, vb.pvb_expr.pexp_desc with
185
+ | Ppat_var { txt = var_name ; loc } , Pexp_constant (Pconst_string (value , _ , _ )) ->
186
+ if check_unbounded_recursion var_name value then error ~loc " Unbounded recursion detected!"
187
+ else begin
188
+ let parsed = Regexp. parse_exn value in
189
+ Hashtbl. replace ctx var_name parsed;
190
+ let warning_attr =
191
+ attribute ~loc ~name: { txt = " ocaml.warning" ; loc }
192
+ ~payload: (PStr [ { pstr_desc = Pstr_eval (estring ~loc " -32" , [] ); pstr_loc = loc } ])
193
+ in
194
+ { vb with pvb_attributes = warning_attr :: vb .pvb_attributes }
195
+ end
196
+ | _ -> vb
197
+ end
198
+
199
+
200
+ let transform_cases ~ctx ~loc cases =
141
201
let aux case =
142
202
if case.pc_guard <> None then
143
203
error ~loc " Guards are not implemented for match%%pcre."
@@ -148,7 +208,7 @@ let transform_cases ~loc cases =
148
208
(loc_end.pos_cnum - loc_start.pos_cnum - String. length re_src) / 2
149
209
in
150
210
let pos = {loc_start with pos_cnum = loc_start.pos_cnum + re_offset} in
151
- let re, bs, nG = extract_bindings ~pos re_src in
211
+ let re, bs, nG = extract_bindings ~ctx ~ pos re_src in
152
212
(re, nG, bs, case.pc_rhs)
153
213
end
154
214
in
@@ -201,9 +261,37 @@ let transform_cases ~loc cases =
201
261
in
202
262
(cases, re_binding)
203
263
204
- let transformation = object
264
+ let transformation ctx = object ( self )
205
265
inherit [value_binding list ] Ast_traverse. fold_map as super
206
266
267
+ method! structure_item item acc =
268
+ match item.pstr_desc with
269
+ (* let % pcre x = {|some regex|} * )
270
+ | Pstr_extension (({ txt = "pcre" ; _ } , PStr [ { pstr_desc = Pstr_value (rec_flag , vbs ); _ } ]), _ ) ->
271
+ let bindings = transform_let ~ctx vbs in
272
+ let new_item = { item with pstr_desc = Pstr_value (rec_flag, bindings) } in
273
+ new_item, acc
274
+ (* let x = expression (which might contain %pcre, like {%pcre|...|}) *)
275
+ | Pstr_value (rec_flag , vbs ) ->
276
+ let processed_vbs, collected_bindings =
277
+ List. fold_left
278
+ (fun (vbs_acc , bindings_acc ) vb ->
279
+ match vb.pvb_expr.pexp_desc with
280
+ | Pexp_extension ({ txt = " pcre" ; _ }, PStr [ { pstr_desc = Pstr_eval (expr, _); _ } ])
281
+ when match expr.pexp_desc with Pexp_constant (Pconst_string _ ) -> true | _ -> false ->
282
+ let new_vb = { vb with pvb_expr = expr } in
283
+ let transformed = transform_let ~ctx [ new_vb ] in
284
+ List. hd transformed :: vbs_acc, bindings_acc
285
+ | _ ->
286
+ let new_expr, new_bindings = self#expression vb.pvb_expr bindings_acc in
287
+ let new_vb = { vb with pvb_expr = new_expr } in
288
+ new_vb :: vbs_acc, new_bindings)
289
+ ([] , acc) vbs
290
+ in
291
+ let new_item = { item with pstr_desc = Pstr_value (rec_flag, List. rev processed_vbs) } in
292
+ new_item, collected_bindings
293
+ | _ -> super#structure_item item acc
294
+
207
295
method! expression e_ext acc =
208
296
let e_ext, acc = super#expression e_ext acc in
209
297
(match e_ext.pexp_desc with
@@ -212,18 +300,19 @@ let transformation = object
212
300
let loc = e.pexp_loc in
213
301
(match e.pexp_desc with
214
302
| Pexp_match (e , cases ) ->
215
- let cases, binding = transform_cases ~loc cases in
303
+ let cases, binding = transform_cases ~ctx ~ loc cases in
216
304
([% expr let _ppx_regexp_v = [% e e] in [% e cases]], binding :: acc)
217
305
| Pexp_function (cases ) ->
218
- let cases, binding = transform_cases ~loc cases in
306
+ let cases, binding = transform_cases ~ctx ~ loc cases in
219
307
([% expr fun _ppx_regexp_v -> [% e cases]], binding :: acc)
220
308
| _ ->
221
309
error ~loc " [%%pcre] only applies to match an function." )
222
310
| _ -> (e_ext, acc))
223
311
end
224
312
225
313
let impl str =
226
- let str, rev_bindings = transformation#structure str [] in
314
+ let ctx = Ctx. empty () in
315
+ let str, rev_bindings = (transformation ctx)#structure str [] in
227
316
if rev_bindings = [] then str else
228
317
let re_str =
229
318
let loc = Location. none in
0 commit comments