@@ -28,6 +28,41 @@ module Regexp = struct
28
28
in
29
29
function { Location. txt = Capture_as (idr , conv , e ); _ } -> recurse true e (0 , [ idr, None , conv, true ]) | e -> recurse true e (0 , [] )
30
30
31
+ let to_re_expr ~ctx =
32
+ let rec recurse ~ctx (e' : _ Location.loc ) =
33
+ let loc = e'.Location. loc in
34
+ match e'.Location. txt with
35
+ | Code s -> [% expr Re.Perl. re [% e estring ~loc s]]
36
+ | Seq es ->
37
+ let exprs = List. map (recurse ~ctx ) es in
38
+ [% expr Re. seq [% e elist ~loc exprs]]
39
+ | Alt es ->
40
+ let exprs = List. map (recurse ~ctx ) es in
41
+ [% expr Re. alt [% e elist ~loc exprs]]
42
+ | Opt e -> [% expr Re. opt [% e recurse ~ctx e]]
43
+ | Repeat ({ Location. txt = i , j_opt ; _ } , e ) ->
44
+ let e_i = eint ~loc i in
45
+ let e_j = match j_opt with None -> [% expr None ] | Some j -> [% expr Some [% e eint ~loc j]] in
46
+ [% expr Re. repn [% e recurse ~ctx e] [% e e_i] [% e e_j]]
47
+ | Nongreedy e -> [% expr Re. non_greedy [% e recurse ~ctx e]]
48
+ | Capture _ -> Util. error ~loc " Unnamed capture is not allowed for %%pcre and %%mikmatch."
49
+ | Capture_as (_ , _ , e ) -> [% expr Re. group [% e recurse ~ctx e]]
50
+ | Named_subs (idr , _ , _ , _ ) ->
51
+ let content = get_substitution ~loc ~ctx idr in
52
+ [% expr Re. group [% e recurse ~ctx content]]
53
+ | Unnamed_subs (idr , _ ) ->
54
+ let content = get_substitution ~loc ~ctx idr in
55
+ recurse ~ctx content
56
+ | Pipe_all (_ , _ , e ) -> recurse ~ctx e
57
+ | Call _ -> Util. error ~loc " Call is not allowed for %%pcre and %%mikmatch."
58
+ and get_substitution ~loc ~ctx idr =
59
+ let var_name = idr.txt in
60
+ match Util.Ctx. find var_name ctx with
61
+ | Some value -> value
62
+ | None -> Util. error ~loc " Variable '%s' not found. %%pcre and %%mikmatch only support global let bindings for substitution." var_name
63
+ in
64
+ function { Location. txt = Capture_as (_ , _ , e ); _ } -> recurse ~ctx e | e -> recurse ~ctx e
65
+
31
66
let to_string ~ctx =
32
67
let p_alt, p_seq, p_suffix, p_atom = 0 , 1 , 2 , 3 in
33
68
let delimit_if b s = if b then " (?:" ^ s ^ " )" else s in
@@ -72,6 +107,14 @@ module Regexp = struct
72
107
function { Location. txt = Capture_as (_ , _ , e ); _ } -> recurse 0 e | e -> recurse 0 e
73
108
end
74
109
110
+ let apply_re_opts ~loc re_expr opts =
111
+ let rec apply re = function
112
+ | [] -> re
113
+ | `Caseless :: rest -> apply [% expr Re. no_case [% e re]] rest
114
+ | `Anchored :: rest -> apply [% expr Re. whole_string [% e re]] rest
115
+ in
116
+ apply re_expr opts
117
+
75
118
let rec wrap_group_bindings ~loc ~captured_acc rhs offG = function
76
119
| [] -> rhs
77
120
| [ (varG, _, Some (Regexp_types. Pipe_all_func func_name), _) ] ->
@@ -102,28 +145,6 @@ let rec wrap_group_bindings ~loc ~captured_acc rhs offG = function
102
145
let [% p pat] = [% e eG] in
103
146
[% e wrap_group_bindings ~loc ~captured_acc: (pat_ident :: captured_acc) rhs offG bs]]
104
147
105
- let guards_equal g1 g2 =
106
- match g1, g2 with
107
- | None , None -> true
108
- | Some e1 , Some e2 ->
109
- (* TODO: simplified, should use a more sophisticated AST comparison *)
110
- Pprintast. string_of_expression e1 = Pprintast. string_of_expression e2
111
- | _ -> false
112
-
113
- let group_by_guard cases =
114
- List. fold_left
115
- begin
116
- fun groups case ->
117
- let guard = case.pc_guard in
118
- let rec add_to_groups = function
119
- | [] -> [ guard, [ case ] ]
120
- | (g , cases ) :: rest -> if guards_equal g guard then (g, case :: cases) :: rest else (g, cases) :: add_to_groups rest
121
- in
122
- add_to_groups groups
123
- end
124
- [] cases
125
- |> List. map (fun (g , cases ) -> g, cases)
126
-
127
148
let rec separate_defaults acc = function
128
149
| [] -> List. rev acc, []
129
150
| ({ pc_lhs = { ppat_desc = Ppat_any ; _ } ; _ } as case ) :: rest -> acc, case :: rest
@@ -138,9 +159,8 @@ let rec create_opts ~loc = function
138
159
let extract_bindings ~(parser : ?pos:position -> string -> string Regexp_types.t ) ~ctx ~pos s =
139
160
let r = parser ~pos s in
140
161
let nG, bs = Regexp. bindings r in
141
- let re_str = Regexp. to_string ~ctx r in
142
- let loc = Location. none in
143
- estring ~loc re_str, bs, nG
162
+ let re = Regexp. to_re_expr ~ctx r in
163
+ re, bs, nG
144
164
145
165
let make_default_rhs ~loc = function
146
166
| [] ->
@@ -208,102 +228,188 @@ let transform_cases ~mode ~opts ~loc ~ctx cases =
208
228
re, nG, bs, case.pc_rhs, case.pc_guard
209
229
end
210
230
in
211
- let group_by_pattern cases =
212
- List. fold_left
213
- begin
214
- fun acc (re , nG , bs , rhs , guard ) ->
215
- let found, groups =
216
- List. fold_left
217
- begin
218
- fun (found , acc_groups ) (re' , cases ) ->
219
- if found then found, (re', cases) :: acc_groups
220
- else if re = re' then true , (re', (nG, bs, rhs, guard) :: cases) :: acc_groups
221
- else false , (re', cases) :: acc_groups
222
- end
223
- (false , [] ) acc
231
+
232
+ let group_by_guard_and_re cases =
233
+ let rec group acc current_group = function
234
+ | [] -> if current_group = [] then acc else current_group :: acc
235
+ | ((re , _ , _ , _ , guard ) as case ) :: rest ->
236
+ (match current_group with
237
+ | [] -> group acc [ case ] rest
238
+ | cases_in_group ->
239
+ let can_merge =
240
+ match guard with
241
+ | None -> List. for_all (fun (_ , _ , _ , _ , g ) -> g = None ) cases_in_group
242
+ | Some _ -> List. exists (fun (re' , _ , _ , _ , _ ) -> re = re') cases_in_group
224
243
in
225
- if found then groups else (re, [ nG, bs, rhs, guard ]) :: groups
226
- end
227
- [] cases
244
+ if can_merge then group acc (case :: current_group) rest else group ( List. rev current_group :: acc) [ case ] rest)
245
+ in
246
+ group [] [] cases
228
247
in
229
248
230
- let compute_offsets l =
231
- let result, _ =
232
- List. fold_left
233
- begin
234
- fun (acc , offG ) (re , case_group ) ->
235
- let nG =
236
- let n, _, _, _ = List. hd (List. rev case_group) in
237
- n
238
- in
239
- (re, case_group, offG) :: acc, offG + nG
240
- end
241
- ([] , 0 ) l
249
+ let compile_group group_idx group_cases =
250
+ (* 1: group cases by their regex pattern *)
251
+ let group_by_pattern cases =
252
+ let add_case_to_groups (re , nG , bs , rhs , guard ) groups =
253
+ let rec update_groups = function
254
+ | [] -> [ re, [ nG, bs, rhs, guard ] ]
255
+ | (re' , cases ) :: rest when re = re' -> (re', (nG, bs, rhs, guard) :: cases) :: rest
256
+ | group :: rest -> group :: update_groups rest
257
+ in
258
+ update_groups groups
259
+ in
260
+ List. fold_left (fun acc case -> add_case_to_groups case acc) [] cases |> List. rev
261
+ in
262
+
263
+ (* 2: calculate offsets for capture groups *)
264
+ let calculate_offsets pattern_groups =
265
+ let rec calc acc offset = function
266
+ | [] -> List. rev acc
267
+ | (re , handlers ) :: rest ->
268
+ let max_captures = handlers |> List. map (fun (n , _ , _ , _ ) -> n) |> List. fold_left max 0 in
269
+ calc ((re, handlers, offset) :: acc) (offset + max_captures) rest
270
+ in
271
+ calc [] 0 pattern_groups
242
272
in
243
- List. rev result
273
+
274
+ (* 3: create handler function for cases *)
275
+ let create_handler handler_name case_handlers offset =
276
+ let rec make_guard_chain = function
277
+ | [] -> [% expr None ]
278
+ | (_ , bs , rhs , None) :: _ ->
279
+ (* no guard - directly return result *)
280
+ let bs_rev = List. rev bs in
281
+ [% expr Some [% e wrap_group_bindings ~captured_acc: [] ~loc rhs offset bs_rev]]
282
+ | (_ , bs , rhs , Some guard_expr ) :: rest ->
283
+ (* has guard - check it and continue if fails *)
284
+ let bs_rev = List. rev bs in
285
+ let guarded = [% expr if [% e guard_expr] then Some [% e rhs] else [% e make_guard_chain rest]] in
286
+ wrap_group_bindings ~captured_acc: [] ~loc guarded offset bs_rev
287
+ in
288
+ let body = [% expr fun _g -> [% e make_guard_chain (List. rev case_handlers)]] in
289
+ handler_name, body
290
+ in
291
+
292
+ let pattern_groups = group_by_pattern group_cases in
293
+ let patterns_with_offsets = calculate_offsets pattern_groups in
294
+
295
+ let re_array = patterns_with_offsets |> List. map (fun (re , _ , _ ) -> re) |> pexp_array ~loc in
296
+
297
+ let handlers =
298
+ patterns_with_offsets
299
+ |> List. mapi (fun i (_ , case_handlers , offset ) ->
300
+ let handler_name = Printf. sprintf " _group%d_case_%d" group_idx i in
301
+ create_handler handler_name case_handlers offset)
302
+ in
303
+
304
+ re_array, handlers
244
305
in
245
306
246
- let cases, default_cases = separate_defaults [] cases in
247
- let default_rhs = make_default_rhs ~loc default_cases in
248
- let processed_cases = List. map aux cases |> group_by_pattern |> compute_offsets in
307
+ let build_group_match_expr ~ loc ~ idx ~ re_var_name ~ handlers ~ has_guards ~ is_single_pattern =
308
+ let re_var = pexp_ident ~loc { txt = Lident re_var_name; loc } in
309
+ let continue_next = [ % expr __ppx_regexp_try_next ([ % e eint ~loc idx] + 1 )] in
249
310
250
- let res = pexp_array ~loc @@ List. map (fun (re , _ , _ ) -> re) processed_cases in
311
+ let build_exec_match ~on_match =
312
+ [% expr match Re. exec_opt (fst [% e re_var]) _ppx_regexp_v with None -> [% e continue_next] | Some _g -> [% e on_match]]
313
+ in
251
314
252
- let opts_expr = create_opts ~loc opts in
315
+ if is_single_pattern then (
316
+ (* single pattern: direct handler call, no dispatcher *)
317
+ let handler_name = fst (List. hd handlers) in
318
+ let handler = pexp_ident ~loc { txt = Lident handler_name; loc } in
253
319
254
- let comp =
255
- [% expr
256
- let a = Array. map (fun s -> Re. mark (Re.Perl. re ~opts: [% e opts_expr] s)) [% e res] in
257
- let marks = Array. map fst a in
258
- let re = Re. compile (Re. alt (Array. to_list (Array. map snd a))) in
259
- re, marks]
320
+ build_exec_match ~on_match: [% expr match [% e handler] _g with Some result -> result | None -> [% e continue_next]])
321
+ else (
322
+ (* multiple patterns: use dispatcher *)
323
+ let handlers_array = handlers |> List. map (fun (name , _ ) -> pexp_ident ~loc { txt = Lident name; loc }) |> pexp_array ~loc in
324
+
325
+ let dispatch_call = [% expr __ppx_regexp_dispatch (snd [% e re_var]) [% e handlers_array] _g] in
326
+
327
+ build_exec_match
328
+ ~on_match:
329
+ (if has_guards then [% expr match [% e dispatch_call] with Some result -> result | None -> [% e continue_next]]
330
+ else [% expr match [% e dispatch_call] with Some result -> result | None -> assert false ]))
260
331
in
261
332
262
- let var = Util. fresh_var () in
263
- let re_binding = value_binding ~loc ~pat: (ppat_var ~loc { txt = var; loc }) ~expr: comp in
264
- let e_comp = pexp_ident ~loc { txt = Lident var; loc } in
333
+ let cases, default_cases = separate_defaults [] cases in
334
+ let default_rhs = make_default_rhs ~loc default_cases in
335
+ let processed_cases = List. map aux cases in
336
+ let case_groups = group_by_guard_and_re processed_cases in
265
337
266
- let case_bindings =
338
+ let compiled_groups =
267
339
List. mapi
268
- begin
269
- fun i (_ , case_group , offG ) ->
270
- let handler_name = Printf. sprintf " _case_%d" i in
271
- let handler_body =
272
- let rec mk_guard_chains = function
273
- | [] -> [% expr None ]
274
- | (_ , bs , rhs , guard ) :: rest ->
275
- let bs = List. rev bs in
276
- begin
277
- match guard with
278
- | None -> [% expr Some [% e wrap_group_bindings ~captured_acc: [] ~loc rhs offG bs]]
279
- | Some guard_expr ->
280
- let guarded = [% expr if [% e guard_expr] then Some [% e rhs] else [% e mk_guard_chains rest]] in
281
- wrap_group_bindings ~captured_acc: [] ~loc guarded offG bs
282
- end
340
+ (fun i group_cases ->
341
+ let re_var_name = Util. fresh_var () in
342
+ let re_array, handlers = compile_group i group_cases in
343
+ re_var_name, re_array, handlers)
344
+ case_groups
345
+ in
346
+
347
+ let re_bindings =
348
+ List. map
349
+ (fun (var_name , re_array , _ ) ->
350
+ let is_single = match re_array.pexp_desc with Pexp_array [ _ ] -> true | _ -> false in
351
+
352
+ let comp_expr =
353
+ if is_single then (
354
+ (* single pattern - no marks needed *)
355
+ let single_re = match re_array.pexp_desc with Pexp_array [ re ] -> re | _ -> assert false in
356
+ [% expr
357
+ let re = Re. compile [% e apply_re_opts ~loc single_re opts] in
358
+ re, [||]])
359
+ else (
360
+ (* multiple patterns - mark each one *)
361
+ let res_with_opts =
362
+ match re_array.pexp_desc with
363
+ | Pexp_array res -> res |> List. map (fun re -> [% expr Re. mark [% e apply_re_opts ~loc re opts]])
364
+ | _ -> assert false
283
365
in
284
- [% expr fun _g -> [% e mk_guard_chains (List. rev case_group)]]
285
- in
286
- value_binding ~loc ~pat: (ppat_var ~loc { txt = handler_name; loc }) ~expr: handler_body
287
- end
288
- processed_cases
366
+ [% expr
367
+ let a = [% e pexp_array ~loc res_with_opts] in
368
+ let marks = Array. map fst a in
369
+ let re = Re. compile (Re. alt (Array. to_list (Array. map snd a))) in
370
+ re, marks])
371
+ in
372
+ value_binding ~loc ~pat: (ppat_var ~loc { txt = var_name; loc }) ~expr: comp_expr)
373
+ compiled_groups
289
374
in
290
375
291
- let handlers_array =
292
- pexp_array ~loc @@ List. mapi (fun i _ -> pexp_ident ~loc { txt = Lident (Printf. sprintf " _case_%d" i); loc }) processed_cases
376
+ let handler_bindings =
377
+ compiled_groups
378
+ |> List. concat_map (fun (_ , _ , handlers ) ->
379
+ handlers |> List. map (fun (name , body ) -> value_binding ~loc ~pat: (ppat_var ~loc { txt = name; loc }) ~expr: body))
293
380
in
294
381
295
- let match_expr =
382
+ let build_match_cascade groups =
383
+ let groups_with_info =
384
+ List. mapi
385
+ (fun i (re_var_name , re_array , handlers ) ->
386
+ let group_cases = List. nth case_groups i in
387
+ let has_guards = List. exists (fun (_ , _ , _ , _ , g ) -> g <> None ) group_cases in
388
+ let is_single_pattern = match re_array.pexp_desc with Pexp_array [ _ ] -> true | _ -> false in
389
+ i, re_var_name, handlers, has_guards, is_single_pattern)
390
+ groups
391
+ in
392
+
393
+ let match_cases =
394
+ groups_with_info
395
+ |> List. map (fun (idx , re_var_name , handlers , has_guards , is_single_pattern ) ->
396
+ let match_expr = build_group_match_expr ~loc ~idx ~re_var_name ~handlers ~has_guards ~is_single_pattern in
397
+ case ~lhs: (ppat_constant ~loc (Pconst_integer (string_of_int idx, None ))) ~guard: None ~rhs: match_expr)
398
+ in
399
+
400
+ let default_case = case ~lhs: (ppat_any ~loc ) ~guard: None ~rhs: default_rhs in
401
+
296
402
[% expr
297
- match Re. exec_opt (fst [% e e_comp]) _ppx_regexp_v with
298
- | None -> [% e default_rhs]
299
- | Some _g ->
300
- [% e
301
- pexp_let ~loc Nonrecursive case_bindings
302
- [% expr
303
- let handlers = [% e handlers_array] in
304
- match __ppx_regexp_dispatch (snd [% e e_comp]) handlers _g with Some result -> result | None -> [% e default_rhs]]]]
403
+ let rec __ppx_regexp_try_next group_idx = [% e pexp_match ~loc [% expr group_idx] (match_cases @ [ default_case ])] in
404
+ __ppx_regexp_try_next 0 ]
305
405
in
306
- match_expr, re_binding
406
+
407
+ let match_cascade = build_match_cascade compiled_groups in
408
+
409
+ (* Add handler bindings if needed *)
410
+ let match_expr = if handler_bindings = [] then match_cascade else pexp_let ~loc Nonrecursive handler_bindings match_cascade in
411
+
412
+ match_expr, re_bindings
307
413
308
414
(* processes each case individually instead of combining them into one RE *)
309
415
let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
@@ -337,8 +443,8 @@ let transform_mixed_match ~loc ~ctx ?matched_expr cases acc =
337
443
match case with
338
444
| `Ext (opts , re , _ , _ , _ , _ ) ->
339
445
let comp_var = Util. fresh_var () in
340
- let opts_expr = create_opts ~loc opts in
341
- let comp_expr = [% expr Re. compile ( Re.Perl. re ~opts: [% e opts_expr] [ % e re]) ] in
446
+ let re_with_opts = apply_re_opts ~loc re opts in
447
+ let comp_expr = [% expr Re. compile [% e re_with_opts] ] in
342
448
let binding = value_binding ~loc ~pat: (ppat_var ~loc { txt = comp_var; loc }) ~expr: comp_expr in
343
449
Some (i, comp_var, binding)
344
450
| _ -> None
0 commit comments