Skip to content

Commit 4452594

Browse files
committed
fix unit tests for regexp lib
1 parent d8759d7 commit 4452594

File tree

2 files changed

+72
-18
lines changed

2 files changed

+72
-18
lines changed

lib/regexp.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ let parse_exn ~target:_ ?(pos = Lexing.dummy_pos) s =
5858
Location.raise_errorf ~loc:Location.none "Unknown flag '%c' at position %d" c error_pos)
5959
in
6060
pattern, parse_flags 0 pcre_default_flags
61-
| _ -> Location.raise_errorf ~loc:Location.none "Unmatched opening '/'")
61+
| _ -> s, pcre_default_flags)
6262
else s, pcre_default_flags
6363
in
6464

tests/test_regexp.ml

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*)
1717

1818
open Printf
19+
open Regexp_types
1920
module Loc = Location
2021
module Q = QCheck
2122

@@ -70,8 +71,10 @@ module Regexp = struct
7071
| Opt e -> (match simplify e with { Loc.txt = Opt _; _ } as e' -> e'.Loc.txt | e' -> Opt e')
7172
| Repeat (ij, e) -> Repeat (ij, simplify e)
7273
| Nongreedy e -> Nongreedy (simplify e)
74+
| Caseless e -> Caseless (simplify e)
7375
| Capture e -> Capture (simplify e)
74-
| Capture_as (name, e) -> Capture_as (name, simplify e)
76+
| Capture_as (name, conv, e) -> Capture_as (name, conv, simplify e)
77+
| Pipe_all (name, conv, e) -> Pipe_all (name, conv, simplify e)
7578
| (Code _ | Call _) as e -> e
7679

7780
let rec equal' e1 e2 =
@@ -81,8 +84,10 @@ module Regexp = struct
8184
| Opt e1, Opt e2 -> equal' e1 e2
8285
| Repeat ({ Loc.txt = ij1; _ }, e1), Repeat ({ Loc.txt = ij2; _ }, e2) -> ij1 = ij2 && equal' e1 e2
8386
| Nongreedy e1, Nongreedy e2 -> equal' e1 e2
87+
| Caseless e1, Caseless e2 -> equal' e1 e2
8488
| Capture e1, Capture e2 -> equal' e1 e2
85-
| Capture_as (name1, e1), Capture_as (name2, e2) -> name1.Loc.txt = name2.Loc.txt && equal' e1 e2
89+
| Capture_as (name1, conv1, e1), Capture_as (name2, conv2, e2) -> name1.Loc.txt = name2.Loc.txt && conv1 = conv2 && equal' e1 e2
90+
| Pipe_all (name1, conv1, e1), Pipe_all (name2, conv2, e2) -> name1.Loc.txt = name2.Loc.txt && conv1 = conv2 && equal' e1 e2
8691
| Call name1, Call name2 -> name1.Loc.txt = name2.Loc.txt
8792
| _, _ -> false (* We'll notice. *)
8893

@@ -93,22 +98,25 @@ module Regexp = struct
9398
let delimit_if b s = if b then "(" ^ s ^ ")" else s in
9499
let rec aux p e =
95100
match e.Loc.txt with
96-
| Code s -> delimit_if (p > p_seq) s
101+
| Code s ->
102+
let s = if String.contains s '/' then String.concat "\\/" (String.split_on_char '/' s) else s in
103+
delimit_if (p > p_seq) s
97104
| Seq es -> delimit_if (p > p_seq) (String.concat "" (List.map (aux p_seq) es))
98105
| Alt es -> delimit_if (p > p_alt) (String.concat "|" (List.map (aux p_alt) es))
99106
| Opt e -> delimit_if (p >= p_suffix) (aux p_suffix e ^ "?")
100107
| Repeat ({ Loc.txt = i, j_opt; _ }, e) ->
101108
let j_str = match j_opt with None -> "" | Some j -> string_of_int j in
102109
delimit_if (p >= p_suffix) (sprintf "%s{%d,%s}" (aux p_suffix e) i j_str)
103110
| Nongreedy e -> aux (p_suffix - 1) e ^ "?"
111+
| Caseless e -> "(?i:" ^ aux (p_suffix - 1) e ^ ")"
104112
| Capture e -> "(+" ^ aux p_bottom e ^ ")"
105-
| Capture_as ({ Loc.txt = name; _ }, e) -> "(?<" ^ name ^ ">" ^ aux p_bottom e ^ ")"
113+
| Capture_as ({ Loc.txt = name; _ }, _, e) -> "(?<" ^ name ^ ">" ^ aux p_bottom e ^ ")"
114+
| Pipe_all (_, _, e) -> aux p_bottom e
106115
| Call { Loc.txt = idr; _ } -> "(&" ^ String.concat "." (Longident.flatten idr) ^ ")"
107116
in
108117
aux 0
109118

110119
let rec pp_debug ppf self =
111-
let open Regexp in
112120
let open Format in
113121
let open Loc in
114122
let pp_pos ppf pos =
@@ -139,8 +147,10 @@ module Regexp = struct
139147
let pp_option f ppf = function None -> () | Some e -> f ppf e in
140148
fprintf ppf "(Repeat {%d,%a}%a %a)" i (pp_option Format.pp_print_int) j pp_loc loc pp_debug e
141149
| Nongreedy e -> fprintf ppf "(Nongreedy %a)" pp_debug e
150+
| Caseless e -> fprintf ppf "(Caseless %a)" pp_debug e
142151
| Capture e -> fprintf ppf "(Capture %a)" pp_debug e
143-
| Capture_as (name, e) -> fprintf ppf "(Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e
152+
| Capture_as (name, _, e) -> fprintf ppf "(Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e
153+
| Pipe_all (name, _, e) -> fprintf ppf "(Pipe_all %s%a %a)" name.txt pp_loc name.loc pp_debug e
144154
| Call name -> fprintf ppf "(Call %s%a)" (String.concat "." (Longident.flatten name.txt)) pp_loc name.loc);
145155
pp_loc ppf self.loc
146156

@@ -160,15 +170,17 @@ module Regexp = struct
160170
| Opt e -> Re.opt (to_re e)
161171
| Repeat ({ Loc.txt = i, j; _ }, e) -> Re.repn (to_re e) i j
162172
| Nongreedy e -> Re.non_greedy (to_re e)
173+
| Caseless e -> Re.no_case (to_re e)
163174
| Capture e -> Re.group (to_re e)
164-
| Capture_as (_, e) -> Re.group (to_re e)
175+
| Capture_as (_, _, e) -> Re.group (to_re e)
176+
| Pipe_all (_, _, e) -> to_re e
165177
| Call _ -> raise Re.Perl.Not_supported
166178

167179
let rec has_anon_capture e =
168180
match e.Loc.txt with
169181
| Code _ | Call _ -> false
170182
| Seq es | Alt es -> List.exists has_anon_capture es
171-
| Opt e | Repeat (_, e) | Capture_as (_, e) | Nongreedy e -> has_anon_capture e
183+
| Opt e | Repeat (_, e) | Capture_as (_, _, e) | Pipe_all (_, _, e) | Nongreedy e | Caseless e -> has_anon_capture e
172184
| Capture _ -> true
173185
end
174186

@@ -193,14 +205,13 @@ let gen_name =
193205

194206
let gen_regexp =
195207
let open Q.Gen in
196-
let open Regexp in
197208
let gen_char = map (fun c -> mknoloc (Code (String.make 1 c))) numeral in
198209
let gen_backlash_op =
199210
let backslash_ops = "wWsSdDbBAZzG" in
200211
map (fun i -> mknoloc (Code (sprintf "\\%c" backslash_ops.[i]))) (int_bound (String.length backslash_ops - 1))
201212
in
202213
let gen_quoted_op =
203-
let quotable = "!\"#$%&'()*+,-./:=<=>?@[\\]^`{|}~" in
214+
let quotable = "!\"#$%&'()*+,-.:=<=>?@[\\]^`{|}~" in
204215
map (fun i -> mknoloc (Code (sprintf "\\%c" quotable.[i]))) (int_bound (String.length quotable - 1))
205216
in
206217
map Regexp.simplify
@@ -212,7 +223,8 @@ let gen_regexp =
212223
let gen_opt = map (fun e -> mknoloc (Opt e)) (self n) in
213224
let gen_repeat = map2 (fun i e -> mknoloc (Repeat (mknoloc (i, None), e))) nat (self n) in
214225
let gen_capture = map (fun e -> mknoloc (Capture e)) (self n) in
215-
let gen_capture_as = map2 (fun a e -> mknoloc (Capture_as (mknoloc a, e))) gen_name (self n) in
226+
let gen_capture_as = map2 (fun a e -> mknoloc (Capture_as (mknoloc a, None, e))) gen_name (self n) in
227+
let gen_caseless = map (fun e -> mknoloc (Caseless e)) (self n) in
216228
frequency
217229
[
218230
1, gen_char;
@@ -224,12 +236,12 @@ let gen_regexp =
224236
n, gen_repeat;
225237
n, gen_capture;
226238
n, gen_capture_as;
239+
n, gen_caseless;
227240
]
228241

229242
let shrink_regexp =
230243
let open Q.Shrink in
231244
let open Q.Iter in
232-
let open Regexp in
233245
let rec shrink e =
234246
match e.Loc.txt with
235247
| Code s -> map (fun s -> mknoloc (Code s)) (string s)
@@ -238,8 +250,9 @@ let shrink_regexp =
238250
| Opt e -> map (fun e -> mknoloc (Opt e)) (shrink e)
239251
| Repeat ({ Loc.txt = i, j; _ }, e) ->
240252
map2 (fun (i, j) e -> mknoloc (Repeat (mknoloc (i, j), e))) (pair (int i) (option int j)) (shrink e)
253+
| Caseless e -> map (fun e -> mknoloc (Caseless e)) (shrink e)
241254
| Capture e -> map (fun e -> mknoloc (Capture e)) (shrink e)
242-
| Capture_as (name, e) -> map2 (fun name e -> mknoloc (Capture_as (mknoloc name, e))) (string name.Loc.txt) (shrink e)
255+
| Capture_as (name, conv, e) -> map2 (fun name e -> mknoloc (Capture_as (mknoloc name, conv, e))) (string name.Loc.txt) (shrink e)
243256
| _ -> empty
244257
in
245258
fun e -> map Regexp.simplify (shrink e)
@@ -248,9 +261,10 @@ let arb_regexp = Q.make ~print:Regexp.show_debug ~shrink:shrink_regexp gen_regex
248261

249262
let test_parse s =
250263
let r =
251-
match Regexp.parse_exn s with
264+
match Regexp.parse_exn ~target:`Match s with
252265
| exception Loc.Error err -> Error err
253-
| e -> Ok (e, try Ok (Regexp.to_re e) with Re.Perl.Parse_error -> Error `Parse_error | Re.Perl.Not_supported -> Error `Not_supported)
266+
| e, __ ->
267+
Ok (e, try Ok (Regexp.to_re e) with Re.Perl.Parse_error -> Error `Parse_error | Re.Perl.Not_supported -> Error `Not_supported)
254268
in
255269
let r' = try Ok (Re.Perl.re s) with Re.Perl.Parse_error -> Error `Parse_error | Re.Perl.Not_supported -> Error `Not_supported in
256270
match r, r' with
@@ -265,13 +279,53 @@ let test_parse s =
265279
(* TODO: Would have been nice to compare the two Re.t here. *)
266280
true
267281

282+
let test_flag_parsing =
283+
let open Regexp in
284+
let abc = mknoloc @@ Seq (List.map mknoloc [ Code "a"; Code "b"; Code "c" ]) in
285+
let a_slash_b = mknoloc @@ Seq (List.map mknoloc [ Code "a"; Code "\\/"; Code "b" ]) in
286+
let a_slash_b_2 = mknoloc @@ Seq (List.map mknoloc [ Code "a"; Code "/"; Code "b" ]) in
287+
let test_cases =
288+
[
289+
(* Basic flag tests *)
290+
"/abc/", (abc, pcre_default_flags);
291+
"/abc/i", (abc, { case_insensitive = true; anchored = false });
292+
"/abc/a", (abc, { case_insensitive = false; anchored = true });
293+
"/abc/ia", (abc, { case_insensitive = true; anchored = true });
294+
"/abc/ai", (abc, { case_insensitive = true; anchored = true });
295+
(* Patterns with slashes inside *)
296+
"/a\\/b/", (a_slash_b, pcre_default_flags);
297+
"/a\\/b/i", (a_slash_b, { case_insensitive = true; anchored = false });
298+
(* Patterns without flag syntax *)
299+
"abc", (abc, pcre_default_flags);
300+
"a/b", (a_slash_b_2, pcre_default_flags);
301+
(* Whitespace in flags *)
302+
"/abc/ i", (abc, { case_insensitive = true; anchored = false });
303+
"/abc/i a", (abc, { case_insensitive = true; anchored = true });
304+
"/abc/ \t\n\r i \t a \n", (abc, { case_insensitive = true; anchored = true });
305+
]
306+
in
307+
308+
let run_test (input, (expected_ast, expected_flags)) =
309+
match parse_exn ~target:`Match input with
310+
| exception Loc.Error err -> Q.Test.fail_reportf "Failed to parse %S: %a" input pp_location_error err
311+
| ast, flags ->
312+
if not (equal ast expected_ast) then
313+
Q.Test.fail_reportf "Pattern mismatch for %S: expected %a, got %a" input pp_debug expected_ast pp_debug ast
314+
else if flags <> expected_flags then
315+
Q.Test.fail_reportf "Flags mismatch for %S: expected {i=%b; a=%b}, got {i=%b; a=%b}" input expected_flags.case_insensitive
316+
expected_flags.anchored flags.case_insensitive flags.anchored
317+
else true
318+
in
319+
List.for_all run_test test_cases
320+
268321
let tests =
269322
[
270323
Q.Test.make ~long_factor:100 ~name:"parse ∘ to_string" arb_regexp (fun e ->
271-
match Regexp.parse_exn (Regexp.to_string e) with
324+
match Regexp.parse_exn ~target:`Match (Regexp.to_string e) with
272325
| exception Loc.Error err -> Q.Test.fail_reportf "%a" pp_location_error err
273-
| e' -> Regexp.equal e' e);
326+
| e', _ -> Regexp.equal e' e);
274327
Q.Test.make ~long_factor:100 ~name:"to_string ∘ parse" (Q.string_gen Q.Gen.printable) test_parse;
328+
Q.Test.make ~name:"flag parsing" Q.unit (fun () -> test_flag_parsing);
275329
]
276330

277331
let () = QCheck_runner.run_tests_main tests

0 commit comments

Comments
 (0)