Skip to content

Commit 6e798ad

Browse files
committed
%pcre extension for global let declarations of strings
3 reasons why: - 1 AST traversal now possible - possible to detect unbounded recursion for `(?U/N<var>)` patterns - possible to pre-parse each string disabled ppx_tyre completely
1 parent c40f200 commit 6e798ad

File tree

3 files changed

+57
-74
lines changed

3 files changed

+57
-74
lines changed

ppx_regexp/ppx_regexp.ml

Lines changed: 45 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,11 @@ module List = struct
3232
end
3333

3434
module Ctx = struct
35-
(* name -> (value, is_used) *)
36-
type t = (string, string * bool) Hashtbl.t
35+
(* name -> parsed value) *)
36+
type t = (string, label Regexp.t) Hashtbl.t
3737

3838
let empty () = Hashtbl.create 16
3939
let find name ctx = Hashtbl.find_opt ctx name
40-
41-
let update_used name ctx =
42-
match Hashtbl.find_opt ctx name with
43-
| Some (old_value, _) -> Hashtbl.replace ctx name (old_value, true)
44-
| None -> ()
45-
46-
let is_used name ctx = Hashtbl.find_opt ctx name |> Option.value ~default:("", false) |> snd
4740
end
4841

4942
module Regexp = struct
@@ -75,15 +68,14 @@ module Regexp = struct
7568
let delimit_if b s = if b then "(?:" ^ s ^ ")" else s in
7669
let rec recurse p (e' : _ Location.loc) =
7770
let loc = e'.Location.loc in
78-
let parse_inside idr =
71+
let get_parsed idr =
7972
let var_name = idr.txt in
8073
let content =
8174
match Ctx.find var_name ctx with
82-
| Some (value, _) -> parse_exn value
75+
| Some value -> value
8376
| None ->
8477
error ~loc "Variable '%s' not found. %%pcre only supports global let bindings for substitution." var_name
8578
in
86-
Ctx.update_used var_name ctx;
8779
content
8880
in
8981
match e'.Location.txt with
@@ -101,10 +93,10 @@ module Regexp = struct
10193
| Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre."
10294
| Capture_as (_, e) -> "(" ^ recurse p_alt e ^ ")"
10395
| Named_subs (idr, _, _) ->
104-
let content = parse_inside idr in
96+
let content = get_parsed idr in
10597
"(" ^ recurse p_alt content ^ ")"
10698
| Unnamed_subs (idr, _) ->
107-
let content = parse_inside idr in
99+
let content = get_parsed idr in
108100
recurse p_alt content
109101
| Call _ -> error ~loc "(&...) is not implemented for %%pcre."
110102
in
@@ -217,26 +209,49 @@ let transform_cases ~opts ~loc ~ctx cases =
217209
in
218210
cases, re_binding
219211

212+
let check_unbounded_recursion var_name content =
213+
let contains_regex pattern str =
214+
let re = Re.Str.regexp pattern in
215+
try
216+
Re.Str.search_forward re str 0 |> ignore;
217+
true
218+
with Not_found -> false
219+
in
220+
let u = Printf.sprintf {|(\?U<%s>)|} var_name in
221+
let n = Printf.sprintf {|(\?N<%s>)|} var_name in
222+
let n_as = Printf.sprintf {|(\?N<%s as [^>]*>)|} var_name in
223+
contains_regex u content || contains_regex n content || contains_regex n_as content
224+
225+
let transform_let ~ctx =
226+
List.map
227+
begin
228+
fun vb ->
229+
match vb.pvb_pat.ppat_desc, vb.pvb_expr.pexp_desc with
230+
| Ppat_var { txt = var_name; loc }, Pexp_constant (Pconst_string (value, _, _)) ->
231+
if check_unbounded_recursion var_name value then error ~loc "Unbounded recursion detected!"
232+
else begin
233+
let parsed = Regexp.parse_exn value in
234+
Hashtbl.replace ctx var_name parsed;
235+
let warning_attr =
236+
attribute ~loc ~name:{ txt = "ocaml.warning"; loc }
237+
~payload:(PStr [ { pstr_desc = Pstr_eval (estring ~loc "-32", []); pstr_loc = loc } ])
238+
in
239+
{ vb with pvb_attributes = warning_attr :: vb.pvb_attributes }
240+
end
241+
| _ -> vb
242+
end
243+
220244
let transformation ctx =
221245
object
222246
inherit [value_binding list] Ast_traverse.fold_map as super
223247

224248
method! structure_item item acc =
225-
begin
226-
match item.pstr_desc with
227-
| Pstr_value (_, vbs) ->
228-
List.iter
229-
begin
230-
fun vb ->
231-
match vb.pvb_pat.ppat_desc, vb.pvb_expr.pexp_desc with
232-
| Ppat_var { txt = name; _ }, Pexp_constant (Pconst_string (value, _, _)) ->
233-
Hashtbl.replace ctx name (value, false)
234-
| _ -> ()
235-
end
236-
vbs
237-
| _ -> ()
238-
end;
239-
super#structure_item item acc
249+
match item.pstr_desc with
250+
| Pstr_extension (({ txt = "pcre"; _ }, PStr [ { pstr_desc = Pstr_value (rec_flag, vbs); _ } ]), _) ->
251+
let bindings = transform_let ~ctx vbs in
252+
let new_item = { item with pstr_desc = Pstr_value (rec_flag, bindings) } in
253+
new_item, acc
254+
| _ -> super#structure_item item acc
240255

241256
method! expression e_ext acc =
242257
let e_ext, acc = super#expression e_ext acc in
@@ -248,10 +263,9 @@ let transformation ctx =
248263
[%e cases]],
249264
binding :: acc )
250265
| Pexp_function cases ->
251-
(* | Pexp_function (_, _, Pfunction_cases (cases, _, _)) -> *)
252266
let cases, binding = transform_cases ~opts ~loc ~ctx cases in
253267
[%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc
254-
| _ -> error ~loc "[%%pcre] only applies to match and function."
268+
| _ -> error ~loc "[%%pcre] only applies to match, function and global let declarations of strings."
255269
in
256270
match e_ext.pexp_desc with
257271
| Pexp_extension ({ txt = "pcre"; _ }, PStr [ { pstr_desc = Pstr_eval (e, _); _ } ]) ->
@@ -263,43 +277,11 @@ let transformation ctx =
263277
| _ -> e_ext, acc
264278
end
265279

266-
let suppress_unused_inlined ctx =
267-
object
268-
inherit Ast_traverse.map as super
269-
270-
method! structure_item item =
271-
match item.pstr_desc with
272-
| Pstr_value (rec_flag, bindings) ->
273-
let bindings =
274-
List.map
275-
begin
276-
fun binding ->
277-
match binding.pvb_pat.ppat_desc, binding.pvb_expr.pexp_desc with
278-
| Ppat_var { txt = var_name; _ }, Pexp_constant (Pconst_string (_, _, _)) ->
279-
let needs_suppression = Ctx.is_used var_name ctx in
280-
if needs_suppression then begin
281-
let loc = binding.pvb_loc in
282-
let warning_attr =
283-
attribute ~loc ~name:{ txt = "ocaml.warning"; loc }
284-
~payload:(PStr [ { pstr_desc = Pstr_eval (estring ~loc "-32", []); pstr_loc = loc } ])
285-
in
286-
{ binding with pvb_attributes = warning_attr :: binding.pvb_attributes }
287-
end
288-
else binding
289-
| _ -> binding
290-
end
291-
bindings
292-
in
293-
{ item with pstr_desc = Pstr_value (rec_flag, bindings) }
294-
| _ -> super#structure_item item
295-
end
296-
297280
let impl str =
298281
let ctx = Ctx.empty () in
299282
let str, rev_bindings = (transformation ctx)#structure str [] in
300283
if rev_bindings = [] then str
301284
else (
302-
let str = (suppress_unused_inlined ctx)#structure str in
303285
let re_str =
304286
let loc = Location.none in
305287
[%str
File renamed without changes.

tests/dune

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,21 @@
1818

1919
; Tests for ppx_tyre
2020

21-
(executable
22-
(name test_ppx_tyre)
23-
(modules test_ppx_tyre)
24-
(libraries re re.perl)
25-
(preprocess (pps ppx_tyre)))
26-
(alias
27-
(name runtest)
28-
(package ppx_tyre)
29-
(deps test_ppx_tyre.exe)
30-
(action (run %{deps})))
21+
; (executable
22+
; (name test_ppx_tyre)
23+
; (modules test_ppx_tyre)
24+
; (libraries re re.perl)
25+
; (preprocess (pps ppx_tyre)))
26+
; (alias
27+
; (name runtest)
28+
; (package ppx_tyre)
29+
; (deps test_ppx_tyre.exe)
30+
; (action (run %{deps})))
3131

3232
; Combined preprocessor
3333

3434
(executable
3535
(name main)
3636
(modules Main)
37-
(libraries ppx_regexp ppx_tyre ocaml-migrate-parsetree))
37+
; (libraries ppx_regexp ppx_tyre ocaml-migrate-parsetree))
38+
(libraries ppx_regexp ocaml-migrate-parsetree))

0 commit comments

Comments
 (0)