16
16
*)
17
17
18
18
open Printf
19
+ open Regexp_types
19
20
module Loc = Location
20
21
module Q = QCheck
21
22
@@ -70,8 +71,10 @@ module Regexp = struct
70
71
| Opt e -> (match simplify e with { Loc. txt = Opt _ ; _ } as e' -> e'.Loc. txt | e' -> Opt e')
71
72
| Repeat (ij , e ) -> Repeat (ij, simplify e)
72
73
| Nongreedy e -> Nongreedy (simplify e)
74
+ | Caseless e -> Caseless (simplify e)
73
75
| Capture e -> Capture (simplify e)
74
- | Capture_as (name , e ) -> Capture_as (name, simplify e)
76
+ | Capture_as (name , conv , e ) -> Capture_as (name, conv, simplify e)
77
+ | Pipe_all (name , conv , e ) -> Pipe_all (name, conv, simplify e)
75
78
| (Code _ | Call _ ) as e -> e
76
79
77
80
let rec equal ' e1 e2 =
@@ -81,8 +84,10 @@ module Regexp = struct
81
84
| Opt e1 , Opt e2 -> equal' e1 e2
82
85
| Repeat ({ Loc. txt = ij1 ; _ } , e1 ), Repeat ({ Loc. txt = ij2 ; _ } , e2 ) -> ij1 = ij2 && equal' e1 e2
83
86
| Nongreedy e1 , Nongreedy e2 -> equal' e1 e2
87
+ | Caseless e1 , Caseless e2 -> equal' e1 e2
84
88
| Capture e1 , Capture e2 -> equal' e1 e2
85
- | Capture_as (name1 , e1 ), Capture_as (name2 , e2 ) -> name1.Loc. txt = name2.Loc. txt && equal' e1 e2
89
+ | Capture_as (name1 , conv1 , e1 ), Capture_as (name2 , conv2 , e2 ) -> name1.Loc. txt = name2.Loc. txt && conv1 = conv2 && equal' e1 e2
90
+ | Pipe_all (name1 , conv1 , e1 ), Pipe_all (name2 , conv2 , e2 ) -> name1.Loc. txt = name2.Loc. txt && conv1 = conv2 && equal' e1 e2
86
91
| Call name1 , Call name2 -> name1.Loc. txt = name2.Loc. txt
87
92
| _ , _ -> false (* We'll notice. *)
88
93
@@ -93,22 +98,25 @@ module Regexp = struct
93
98
let delimit_if b s = if b then " (" ^ s ^ " )" else s in
94
99
let rec aux p e =
95
100
match e.Loc. txt with
96
- | Code s -> delimit_if (p > p_seq) s
101
+ | Code s ->
102
+ let s = if String. contains s '/' then String. concat " \\ /" (String. split_on_char '/' s) else s in
103
+ delimit_if (p > p_seq) s
97
104
| Seq es -> delimit_if (p > p_seq) (String. concat " " (List. map (aux p_seq) es))
98
105
| Alt es -> delimit_if (p > p_alt) (String. concat " |" (List. map (aux p_alt) es))
99
106
| Opt e -> delimit_if (p > = p_suffix) (aux p_suffix e ^ " ?" )
100
107
| Repeat ({ Loc. txt = i , j_opt ; _ } , e ) ->
101
108
let j_str = match j_opt with None -> " " | Some j -> string_of_int j in
102
109
delimit_if (p > = p_suffix) (sprintf " %s{%d,%s}" (aux p_suffix e) i j_str)
103
110
| Nongreedy e -> aux (p_suffix - 1 ) e ^ " ?"
111
+ | Caseless e -> " (?i:" ^ aux (p_suffix - 1 ) e ^ " )"
104
112
| Capture e -> " (+" ^ aux p_bottom e ^ " )"
105
- | Capture_as ({ Loc. txt = name ; _ } , e ) -> " (?<" ^ name ^ " >" ^ aux p_bottom e ^ " )"
113
+ | Capture_as ({ Loc. txt = name ; _ } , _ , e ) -> " (?<" ^ name ^ " >" ^ aux p_bottom e ^ " )"
114
+ | Pipe_all (_ , _ , e ) -> aux p_bottom e
106
115
| Call { Loc. txt = idr ; _ } -> " (&" ^ String. concat " ." (Longident. flatten idr) ^ " )"
107
116
in
108
117
aux 0
109
118
110
119
let rec pp_debug ppf self =
111
- let open Regexp in
112
120
let open Format in
113
121
let open Loc in
114
122
let pp_pos ppf pos =
@@ -139,8 +147,10 @@ module Regexp = struct
139
147
let pp_option f ppf = function None -> () | Some e -> f ppf e in
140
148
fprintf ppf " (Repeat {%d,%a}%a %a)" i (pp_option Format. pp_print_int) j pp_loc loc pp_debug e
141
149
| Nongreedy e -> fprintf ppf " (Nongreedy %a)" pp_debug e
150
+ | Caseless e -> fprintf ppf " (Caseless %a)" pp_debug e
142
151
| Capture e -> fprintf ppf " (Capture %a)" pp_debug e
143
- | Capture_as (name , e ) -> fprintf ppf " (Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e
152
+ | Capture_as (name , _ , e ) -> fprintf ppf " (Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e
153
+ | Pipe_all (name , _ , e ) -> fprintf ppf " (Pipe_all %s%a %a)" name.txt pp_loc name.loc pp_debug e
144
154
| Call name -> fprintf ppf " (Call %s%a)" (String. concat " ." (Longident. flatten name.txt)) pp_loc name.loc);
145
155
pp_loc ppf self.loc
146
156
@@ -160,15 +170,17 @@ module Regexp = struct
160
170
| Opt e -> Re. opt (to_re e)
161
171
| Repeat ({ Loc. txt = i , j ; _ } , e ) -> Re. repn (to_re e) i j
162
172
| Nongreedy e -> Re. non_greedy (to_re e)
173
+ | Caseless e -> Re. no_case (to_re e)
163
174
| Capture e -> Re. group (to_re e)
164
- | Capture_as (_ , e ) -> Re. group (to_re e)
175
+ | Capture_as (_ , _ , e ) -> Re. group (to_re e)
176
+ | Pipe_all (_ , _ , e ) -> to_re e
165
177
| Call _ -> raise Re.Perl. Not_supported
166
178
167
179
let rec has_anon_capture e =
168
180
match e.Loc. txt with
169
181
| Code _ | Call _ -> false
170
182
| Seq es | Alt es -> List. exists has_anon_capture es
171
- | Opt e | Repeat (_ , e ) | Capture_as (_ , e ) | Nongreedy e -> has_anon_capture e
183
+ | Opt e | Repeat (_ , e ) | Capture_as (_ , _ , e ) | Pipe_all ( _ , _ , e ) | Nongreedy e | Caseless e -> has_anon_capture e
172
184
| Capture _ -> true
173
185
end
174
186
@@ -193,14 +205,13 @@ let gen_name =
193
205
194
206
let gen_regexp =
195
207
let open Q.Gen in
196
- let open Regexp in
197
208
let gen_char = map (fun c -> mknoloc (Code (String. make 1 c))) numeral in
198
209
let gen_backlash_op =
199
210
let backslash_ops = " wWsSdDbBAZzG" in
200
211
map (fun i -> mknoloc (Code (sprintf " \\ %c" backslash_ops.[i]))) (int_bound (String. length backslash_ops - 1 ))
201
212
in
202
213
let gen_quoted_op =
203
- let quotable = " !\" #$%&'()*+,-./ :=<=>?@[\\ ]^`{|}~" in
214
+ let quotable = " !\" #$%&'()*+,-.:=<=>?@[\\ ]^`{|}~" in
204
215
map (fun i -> mknoloc (Code (sprintf " \\ %c" quotable.[i]))) (int_bound (String. length quotable - 1 ))
205
216
in
206
217
map Regexp. simplify
@@ -212,7 +223,8 @@ let gen_regexp =
212
223
let gen_opt = map (fun e -> mknoloc (Opt e)) (self n) in
213
224
let gen_repeat = map2 (fun i e -> mknoloc (Repeat (mknoloc (i, None ), e))) nat (self n) in
214
225
let gen_capture = map (fun e -> mknoloc (Capture e)) (self n) in
215
- let gen_capture_as = map2 (fun a e -> mknoloc (Capture_as (mknoloc a, e))) gen_name (self n) in
226
+ let gen_capture_as = map2 (fun a e -> mknoloc (Capture_as (mknoloc a, None , e))) gen_name (self n) in
227
+ let gen_caseless = map (fun e -> mknoloc (Caseless e)) (self n) in
216
228
frequency
217
229
[
218
230
1 , gen_char;
@@ -224,12 +236,12 @@ let gen_regexp =
224
236
n, gen_repeat;
225
237
n, gen_capture;
226
238
n, gen_capture_as;
239
+ n, gen_caseless;
227
240
]
228
241
229
242
let shrink_regexp =
230
243
let open Q.Shrink in
231
244
let open Q.Iter in
232
- let open Regexp in
233
245
let rec shrink e =
234
246
match e.Loc. txt with
235
247
| Code s -> map (fun s -> mknoloc (Code s)) (string s)
@@ -238,8 +250,9 @@ let shrink_regexp =
238
250
| Opt e -> map (fun e -> mknoloc (Opt e)) (shrink e)
239
251
| Repeat ({ Loc. txt = i , j ; _ } , e ) ->
240
252
map2 (fun (i , j ) e -> mknoloc (Repeat (mknoloc (i, j), e))) (pair (int i) (option int j)) (shrink e)
253
+ | Caseless e -> map (fun e -> mknoloc (Caseless e)) (shrink e)
241
254
| Capture e -> map (fun e -> mknoloc (Capture e)) (shrink e)
242
- | Capture_as (name , e ) -> map2 (fun name e -> mknoloc (Capture_as (mknoloc name, e))) (string name.Loc. txt) (shrink e)
255
+ | Capture_as (name , conv , e ) -> map2 (fun name e -> mknoloc (Capture_as (mknoloc name, conv , e))) (string name.Loc. txt) (shrink e)
243
256
| _ -> empty
244
257
in
245
258
fun e -> map Regexp. simplify (shrink e)
@@ -248,9 +261,10 @@ let arb_regexp = Q.make ~print:Regexp.show_debug ~shrink:shrink_regexp gen_regex
248
261
249
262
let test_parse s =
250
263
let r =
251
- match Regexp. parse_exn s with
264
+ match Regexp. parse_exn ~target: `Match s with
252
265
| exception Loc. Error err -> Error err
253
- | e -> Ok (e, try Ok (Regexp. to_re e) with Re.Perl. Parse_error -> Error `Parse_error | Re.Perl. Not_supported -> Error `Not_supported )
266
+ | e , __ ->
267
+ Ok (e, try Ok (Regexp. to_re e) with Re.Perl. Parse_error -> Error `Parse_error | Re.Perl. Not_supported -> Error `Not_supported )
254
268
in
255
269
let r' = try Ok (Re.Perl. re s) with Re.Perl. Parse_error -> Error `Parse_error | Re.Perl. Not_supported -> Error `Not_supported in
256
270
match r, r' with
@@ -265,13 +279,53 @@ let test_parse s =
265
279
(* TODO: Would have been nice to compare the two Re.t here. *)
266
280
true
267
281
282
+ let test_flag_parsing =
283
+ let open Regexp in
284
+ let abc = mknoloc @@ Seq (List. map mknoloc [ Code " a" ; Code " b" ; Code " c" ]) in
285
+ let a_slash_b = mknoloc @@ Seq (List. map mknoloc [ Code " a" ; Code " \\ /" ; Code " b" ]) in
286
+ let a_slash_b_2 = mknoloc @@ Seq (List. map mknoloc [ Code " a" ; Code " /" ; Code " b" ]) in
287
+ let test_cases =
288
+ [
289
+ (* Basic flag tests *)
290
+ " /abc/" , (abc, pcre_default_flags);
291
+ " /abc/i" , (abc, { case_insensitive = true ; anchored = false });
292
+ " /abc/a" , (abc, { case_insensitive = false ; anchored = true });
293
+ " /abc/ia" , (abc, { case_insensitive = true ; anchored = true });
294
+ " /abc/ai" , (abc, { case_insensitive = true ; anchored = true });
295
+ (* Patterns with slashes inside *)
296
+ " /a\\ /b/" , (a_slash_b, pcre_default_flags);
297
+ " /a\\ /b/i" , (a_slash_b, { case_insensitive = true ; anchored = false });
298
+ (* Patterns without flag syntax *)
299
+ " abc" , (abc, pcre_default_flags);
300
+ " a/b" , (a_slash_b_2, pcre_default_flags);
301
+ (* Whitespace in flags *)
302
+ " /abc/ i" , (abc, { case_insensitive = true ; anchored = false });
303
+ " /abc/i a" , (abc, { case_insensitive = true ; anchored = true });
304
+ " /abc/ \t\n\r i \t a \n " , (abc, { case_insensitive = true ; anchored = true });
305
+ ]
306
+ in
307
+
308
+ let run_test (input , (expected_ast , expected_flags )) =
309
+ match parse_exn ~target: `Match input with
310
+ | exception Loc. Error err -> Q.Test. fail_reportf " Failed to parse %S: %a" input pp_location_error err
311
+ | ast , flags ->
312
+ if not (equal ast expected_ast) then
313
+ Q.Test. fail_reportf " Pattern mismatch for %S: expected %a, got %a" input pp_debug expected_ast pp_debug ast
314
+ else if flags <> expected_flags then
315
+ Q.Test. fail_reportf " Flags mismatch for %S: expected {i=%b; a=%b}, got {i=%b; a=%b}" input expected_flags.case_insensitive
316
+ expected_flags.anchored flags.case_insensitive flags.anchored
317
+ else true
318
+ in
319
+ List. for_all run_test test_cases
320
+
268
321
let tests =
269
322
[
270
323
Q.Test. make ~long_factor: 100 ~name: " parse ∘ to_string" arb_regexp (fun e ->
271
- match Regexp. parse_exn (Regexp. to_string e) with
324
+ match Regexp. parse_exn ~target: `Match (Regexp. to_string e) with
272
325
| exception Loc. Error err -> Q.Test. fail_reportf " %a" pp_location_error err
273
- | e' -> Regexp. equal e' e);
326
+ | e' , _ -> Regexp. equal e' e);
274
327
Q.Test. make ~long_factor: 100 ~name: " to_string ∘ parse" (Q. string_gen Q.Gen. printable) test_parse;
328
+ Q.Test. make ~name: " flag parsing" Q. unit (fun () -> test_flag_parsing);
275
329
]
276
330
277
331
let () = QCheck_runner. run_tests_main tests
0 commit comments