Skip to content

Commit ed174df

Browse files
mrmondayBurntSushi
authored andcommitted
Add benchmarks for D's ctRegex
1 parent 49f2a3d commit ed174df

File tree

6 files changed

+133
-2
lines changed

6 files changed

+133
-2
lines changed

bench/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ re-onig = ["onig"]
4949
re-re2 = []
5050
re-dphobos = []
5151
re-dphobos-dmd = ["re-dphobos"]
52+
re-dphobos-dmd-ct = ["re-dphobos-dmd"]
5253
re-dphobos-ldc = ["re-dphobos"]
54+
re-dphobos-ldc-ct = ["re-dphobos-ldc"]
5355
re-rust = []
5456
re-rust-bytes = []
5557
re-tcl = []

bench/build.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,19 @@ fn main() {
4242

4343
let out_dir = env::var("OUT_DIR").unwrap();
4444
let out_file = &format!("-of={}/libdphobos-dmd.a", out_dir);
45+
let is_compile_time = env::var("CARGO_FEATURE_RE_DPHOBOS_DMD_CT").is_ok();
46+
let extra_args = if is_compile_time { vec!["-version=CtRegex"] } else { vec![] };
4547

4648
let res = process::Command::new("dmd")
4749
.arg("-w")
4850
.arg("-lib")
4951
.arg("-O")
5052
.arg("-release")
5153
.arg("-inline")
54+
.arg("-Isrc/ffi")
55+
.args(extra_args)
5256
.arg("src/ffi/d_phobos.d")
57+
.arg("src/ffi/d_phobos_ct.d")
5358
.arg(out_file)
5459
.output()
5560
.expect("unable to compile dphobos-regex (dmd)");
@@ -75,13 +80,19 @@ fn main() {
7580
let out_dir = env::var("OUT_DIR").unwrap();
7681
let out_file = &format!("-of={}/libdphobos-ldc.a", out_dir);
7782

83+
let is_compile_time = env::var("CARGO_FEATURE_RE_DPHOBOS_LDC_CT").is_ok();
84+
let extra_args = if is_compile_time { vec!["-d-version=CtRegex"] } else { vec![] };
85+
7886
let res = process::Command::new("ldc")
7987
.arg("-w")
8088
.arg("-lib")
8189
.arg("-O3")
8290
.arg("-release")
8391
.arg("-mcpu=native")
92+
.arg("-Isrc/ffi")
93+
.args(extra_args)
8494
.arg("src/ffi/d_phobos.d")
95+
.arg("src/ffi/d_phobos_ct.d")
8596
.arg(out_file)
8697
.output()
8798
.expect("unable to compile dphobos-regex (ldc)");

bench/run

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
usage() {
4-
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
4+
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | dphobos-dmd-ct | dphobos-ldc-ct | rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
55
exit 1
66
}
77

@@ -23,6 +23,12 @@ case $which in
2323
dphobos-ldc)
2424
exec cargo bench --bench bench --features re-dphobos-ldc "$@"
2525
;;
26+
dphobos-dmd-ct)
27+
exec cargo bench --bench bench --features re-dphobos-dmd-ct "$@"
28+
;;
29+
dphobos-ldc-ct)
30+
exec cargo bench --bench bench --features re-dphobos-ldc-ct "$@"
31+
;;
2632
rust)
2733
exec cargo bench --bench bench --features re-rust "$@"
2834
;;

bench/src/ffi/d_phobos.d

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import std.typecons;
99

1010
import std.stdio;
1111

12+
import d_phobos_ct;
13+
1214
auto rustRegexToD(string regex) {
1315
auto flags = "g";
1416
if (regex.startsWith("(?i)")) {
@@ -27,7 +29,12 @@ extern(C):
2729
void* d_phobos_regex_new(string s) {
2830
auto r = cast(Regex!char*)malloc(Regex!char.sizeof);
2931

30-
*r = regex(rustRegexToD(s).expand);
32+
version(CtRegex) {
33+
auto ctR = getCtRegex();
34+
*r = ctR[s];
35+
} else {
36+
*r = regex(rustRegexToD(s).expand);
37+
}
3138

3239
return r;
3340
}

bench/src/ffi/d_phobos_ct.d

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
module d_phobos_ct;
2+
3+
version(CtRegex):
4+
5+
static immutable PATTERNS = [
6+
r"y", // misc::literal
7+
r".y", // misc::not_literal
8+
"[abcdw]", // misc::match_class
9+
"[ac]", // misc::match_class_in_range
10+
r"\p{L}", // misc::match_class_unicode / sherlock::letters
11+
r"^zbc(d|e)", // misc::anchored_literal_long_non_match / misc::anchored_literal_short_non_match
12+
r"^.bc(d|e)", // misc::anchored_literal_short_match / misc::anchored_literal_long_match
13+
r"^.bc(d|e)*$", // misc::one_pass_short
14+
r".bc(d|e)*$", // misc::one_pass_short_not
15+
r"^abcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix
16+
r"^.bcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix_not
17+
r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", // misc::long_needle1
18+
r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", // misc::long_needle2
19+
r"[r-z].*bcdefghijklmnopq", // misc::reverse_suffix_no_quadratic
20+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::easy0
21+
r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$", // misc::easy1
22+
r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::medium
23+
r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::hard
24+
r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*", // misc::reallyhard
25+
r"\w+\s+Holmes", // misc::reallyhard2
26+
// This causes compile times to go from ~40s to ~9m with dmd 2.077.1
27+
//r"a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // misc::no_exponential
28+
r">[^\n]*\n|\n", // dna::find_new_lines
29+
r"agggtaaa|tttaccct", // dna::variant1
30+
r"[cgt]gggtaaa|tttaccc[acg]", // dna::variant2
31+
r"a[act]ggtaaa|tttacc[agt]t", // dna::variant3
32+
r"ag[act]gtaaa|tttac[agt]ct", // dna::variant4
33+
r"agg[act]taaa|ttta[agt]cct", // dna::variant5
34+
r"aggg[acg]aaa|ttt[cgt]ccct", // dna::variant6
35+
r"agggt[cgt]aa|tt[acg]accct", // dna::variant7
36+
r"agggta[cgt]a|t[acg]taccct", // dna::variant8
37+
r"agggtaa[cgt]|[acg]ttaccct", // dna::variant9
38+
r"B", // dna::subst1
39+
r"D", // dna::subst2
40+
r"H", // dna::subst3
41+
r"K", // dna::subst4
42+
r"M", // dna::subst5
43+
r"N", // dna::subst6
44+
r"R", // dna::subst7
45+
r"S", // dna::subst8
46+
r"V", // dna::subst9
47+
r"W", // dna::subst10
48+
r"Y", // dna::subst11
49+
r"Sherlock", // sherlock::name_sherlock
50+
r"Holmes", // sherlock::name_holmes
51+
r"Sherlock Holmes", // sherlock::name_sherlock_holmes
52+
r"(?i)Sherlock", // sherlock::name_sherlock_nocase
53+
r"(?i)Holmes", // sherlock::name_holmes_nocase
54+
r"(?i)Sherlock Holmes", // sherlock::name_sherlock_holmes_nocase
55+
r"Sherlock\s+Holmes", // sherlock::name_whitespace
56+
r"Sherlock|Street", // sherlock::name_alt1
57+
r"Sherlock|Holmes", // sherlock::name_alt2
58+
r"Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3
59+
r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3_nocase
60+
r"Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4
61+
r"(?i)Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4_nocase
62+
r"Sherlock|Holmes|Watson", // sherlock::name_alt5
63+
r"(?i)Sherlock|Holmes|Watson", // sherlock::name_alt5_nocase
64+
r"zqj", // sherlock::no_match_uncommon
65+
r"aqj", // sherlock::no_match_common
66+
r"aei", // sherlock::no_match_really_common
67+
r"the", // sherlock::the_lower
68+
r"The", // sherlock::the_upper
69+
r"(?i)the", // sherlock::the_nocase
70+
r"the\s+\w+", // sherlock::the_whitespace
71+
r"\p{Lu}", // sherlock::letters_upper
72+
r"\p{Ll}", // sherlock::letters_lower
73+
r"\w+", // sherlock::words
74+
r"\w+\s+Holmes", // sherlock::before_holmes
75+
r"\w+\s+Holmes\s+\w+", // sherlock::before_after_holmes
76+
r"Holmes.{0,25}Watson|Watson.{0,25}Holmes", // sherlock::holmes_cochar_watson
77+
r"Holmes(?:\s*.+\s*){0,10}Watson|Watson(?:\s*.+\s*){0,10}Holmes", // sherlock::holmes_coword_watson
78+
`["'][^"']{0,30}[?!.]["']`, // sherlock::quotes
79+
r"(?m)^Sherlock Holmes|Sherlock Holmes$", // sherlock::line_boundary_sherlock_holmes
80+
r"\b\w+n\b", // sherlock::word_ending_n
81+
r"[a-q][^u-z]{13}x", // sherlock::repeated_class_negation
82+
r"[a-zA-Z]+ing", // sherlock::ing_suffix
83+
r"\s[a-zA-Z]{0,12}ing\s", // sherlock::ing_suffix_limited_space
84+
];
85+
86+
public auto getCtRegex() {
87+
import std.regex;
88+
import std.string;
89+
90+
Regex!char[string] aa;
91+
92+
static foreach (pattern; PATTERNS) {
93+
static if (pattern.startsWith("(?i)")) {
94+
aa[pattern] = ctRegex!(pattern[4..$], "gi");
95+
} else static if (pattern.startsWith("(?m)")) {
96+
aa[pattern] = ctRegex!(pattern[4..$], "gm");
97+
} else {
98+
aa[pattern] = ctRegex!(pattern, "g");
99+
}
100+
}
101+
102+
return aa;
103+
}

bench/src/misc.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ use {Regex, Text};
1919
#[cfg(not(feature = "re-onig"))]
2020
#[cfg(not(feature = "re-pcre1"))]
2121
#[cfg(not(feature = "re-pcre2"))]
22+
#[cfg(not(feature = "re-dphobos-dmd-ct"))]
23+
#[cfg(not(feature = "re-dphobos-ldc-ct"))]
2224
bench_match!(no_exponential, {
2325
format!(
2426
"{}{}",

0 commit comments

Comments
 (0)