Skip to content

Commit 9c79065

Browse files
mrmondayBurntSushi
authored andcommitted
Add support for benchmarking D's std.regex
This commit adds support for benchmarking the runtime version of the D programming language's std.regex using the dmd and ldc compilers. Closes #430
1 parent 557ea81 commit 9c79065

File tree

9 files changed

+270
-2
lines changed

9 files changed

+270
-2
lines changed

bench/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ re-pcre1 = ["libpcre-sys"]
4747
re-pcre2 = []
4848
re-onig = ["onig"]
4949
re-re2 = []
50+
re-dphobos-dmd = []
51+
re-dphobos-ldc = []
5052
re-rust = []
5153
re-rust-bytes = []
5254
re-tcl = []

bench/build.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern crate cc;
1212
extern crate pkg_config;
1313

1414
use std::env;
15+
use std::process;
1516

1617
fn main() {
1718
if env::var("CARGO_FEATURE_RE_PCRE2").is_ok() {
@@ -30,4 +31,69 @@ fn main() {
3031
if env::var("CARGO_FEATURE_RE_TCL").is_ok() {
3132
pkg_config::probe_library("tcl").unwrap();
3233
}
34+
35+
if env::var("CARGO_FEATURE_RE_DPHOBOS_DMD").is_ok() {
36+
process::Command::new("dmd")
37+
.arg("--version")
38+
.stdout(process::Stdio::null())
39+
.stderr(process::Stdio::null())
40+
.spawn()
41+
.unwrap();
42+
43+
let out_dir = env::var("OUT_DIR").unwrap();
44+
let out_file = &format!("-of={}/libdphobos-dmd.a", out_dir);
45+
46+
let res = process::Command::new("dmd")
47+
.arg("-w")
48+
.arg("-lib")
49+
.arg("-O")
50+
.arg("-release")
51+
.arg("-inline")
52+
.arg("src/ffi/d_phobos.d")
53+
.arg(out_file)
54+
.output()
55+
.expect("unable to compile dphobos-regex (dmd)");
56+
57+
if !res.status.success() {
58+
println!("{}", String::from_utf8_lossy(&res.stderr));
59+
}
60+
assert!(res.status.success());
61+
62+
println!("cargo:rustc-link-search=native={}", out_dir);
63+
println!("cargo:rustc-link-lib=dphobos-dmd");
64+
println!("cargo:rustc-link-lib=phobos2");
65+
}
66+
67+
if env::var("CARGO_FEATURE_RE_DPHOBOS_LDC").is_ok() {
68+
process::Command::new("ldc")
69+
.arg("--version")
70+
.stdout(process::Stdio::null())
71+
.stderr(process::Stdio::null())
72+
.spawn()
73+
.unwrap();
74+
75+
let out_dir = env::var("OUT_DIR").unwrap();
76+
let out_file = &format!("-of={}/libdphobos-ldc.a", out_dir);
77+
78+
let res = process::Command::new("ldc")
79+
.arg("-w")
80+
.arg("-lib")
81+
.arg("-O3")
82+
.arg("-release")
83+
.arg("-mcpu=native")
84+
.arg("src/ffi/d_phobos.d")
85+
.arg(out_file)
86+
.output()
87+
.expect("unable to compile dphobos-regex (ldc)");
88+
89+
if !res.status.success() {
90+
println!("{}", String::from_utf8_lossy(&res.stderr));
91+
}
92+
assert!(res.status.success());
93+
94+
println!("cargo:rustc-link-search=native={}", out_dir);
95+
println!("cargo:rustc-link-lib=dphobos-ldc");
96+
println!("cargo:rustc-link-lib=druntime-ldc");
97+
println!("cargo:rustc-link-lib=phobos2-ldc");
98+
}
3399
}

bench/compile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ export RUSTFLAGS="-C target-cpu=native"
55

66
exec cargo build \
77
--release \
8-
--features 're-onig re-pcre1 re-pcre2 re-re2 re-rust re-rust-bytes re-tcl' \
8+
--features 're-onig re-pcre1 re-pcre2 re-rust re-rust-bytes re-tcl re-dphobos-dmd re-dphobos-ldc' \
99
"$@"

bench/run

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
usage() {
4-
echo "Usage: $(basename $0) [rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
4+
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
55
exit 1
66
}
77

@@ -16,6 +16,12 @@ export RUSTFLAGS="-C target-cpu=native"
1616
which="$1"
1717
shift
1818
case $which in
19+
dphobos-dmd)
20+
exec cargo bench --bench bench --features re-dphobos-dmd "$@"
21+
;;
22+
dphobos-ldc)
23+
exec cargo bench --bench bench --features re-dphobos-ldc "$@"
24+
;;
1925
rust)
2026
exec cargo bench --bench bench --features re-rust "$@"
2127
;;

bench/src/bench.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ pub use ffi::pcre1::Regex;
3737
pub use ffi::pcre2::Regex;
3838
#[cfg(feature = "re-re2")]
3939
pub use ffi::re2::Regex;
40+
#[cfg(any(feature = "re-dphobos-dmd",
41+
feature = "re-dphobos-ldc"))]
42+
pub use ffi::d_phobos::Regex;
4043
#[cfg(feature = "re-rust")]
4144
pub use regex::Regex;
4245
#[cfg(feature = "re-rust-bytes")]
@@ -89,6 +92,8 @@ macro_rules! text {
8992
feature = "re-pcre1",
9093
feature = "re-pcre2",
9194
feature = "re-re2",
95+
feature = "re-dphobos-ldc",
96+
feature = "re-dphobos-dmd",
9297
feature = "re-rust",
9398
))]
9499
macro_rules! text {
@@ -105,6 +110,8 @@ type Text = Vec<u8>;
105110
feature = "re-pcre1",
106111
feature = "re-pcre2",
107112
feature = "re-re2",
113+
feature = "re-dphobos-ldc",
114+
feature = "re-dphobos-dmd",
108115
feature = "re-rust",
109116
))]
110117
type Text = String;

bench/src/ffi/d_phobos.d

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
module d_phobos;
2+
3+
import core.memory;
4+
import core.stdc.stdlib : malloc, free;
5+
6+
import std.regex;
7+
import std.string;
8+
import std.typecons;
9+
10+
import std.stdio;
11+
12+
auto rustRegexToD(string regex) {
13+
auto flags = "g";
14+
if (regex.startsWith("(?i)")) {
15+
flags = "gi";
16+
regex = regex[4..$];
17+
} else if (regex.startsWith("(?m)")) {
18+
flags = "gm";
19+
regex = regex[4..$];
20+
}
21+
22+
return tuple(regex.dup, flags);
23+
}
24+
25+
extern(C):
26+
27+
void* d_phobos_regex_new(string s) {
28+
auto r = cast(Regex!char*)malloc(Regex!char.sizeof);
29+
30+
*r = regex(rustRegexToD(s).expand);
31+
32+
return r;
33+
}
34+
35+
void d_phobos_regex_free(void* r) {
36+
free(r);
37+
}
38+
39+
bool d_phobos_regex_is_match(void* r, string s) {
40+
auto regex = *cast(Regex!char*)r;
41+
return !matchFirst(s, regex).empty;
42+
}
43+
44+
bool d_phobos_regex_find_at(void* r, string s, size_t start, out size_t match_start, out size_t match_end) {
45+
auto regex = *cast(Regex!char*)r;
46+
auto match = matchFirst(s[start..$], regex);
47+
48+
if (match.empty) {
49+
return false;
50+
}
51+
52+
match_start = match.pre().ptr - s.ptr;
53+
match_end = match.post().ptr - s.ptr;
54+
return true;
55+
}
56+

bench/src/ffi/d_phobos.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#![allow(non_camel_case_types)]
2+
3+
use libc::{c_uchar, c_void};
4+
5+
/// Regex wraps a D regular expression
6+
pub struct Regex {
7+
re: *mut d_regex,
8+
}
9+
10+
unsafe impl Send for Regex {}
11+
12+
impl Drop for Regex {
13+
fn drop(&mut self) {
14+
unsafe {
15+
d_phobos_regex_free(self.re);
16+
rt_term();
17+
}
18+
}
19+
}
20+
21+
#[derive(Debug)]
22+
pub struct Error(());
23+
24+
impl Regex {
25+
pub fn new(pattern: &str) -> Result<Regex, Error> {
26+
unsafe {
27+
rt_init();
28+
29+
Ok(Regex { re: d_phobos_regex_new(pattern.into()) })
30+
}
31+
}
32+
33+
pub fn is_match(&self, text: &str) -> bool {
34+
unsafe {
35+
d_phobos_regex_is_match(self.re, text.into())
36+
}
37+
}
38+
39+
pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
40+
FindMatches {
41+
re: self,
42+
text: text,
43+
last_end: 0,
44+
last_match: None,
45+
}
46+
}
47+
48+
fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
49+
let (mut s, mut e): (usize, usize) = (0, 0);
50+
let matched = unsafe {
51+
d_phobos_regex_find_at(
52+
self.re,
53+
text.into(),
54+
start,
55+
&mut s,
56+
&mut e,
57+
)
58+
};
59+
if matched {
60+
Some((s, e))
61+
} else {
62+
None
63+
}
64+
}
65+
}
66+
67+
pub struct FindMatches<'r, 't> {
68+
re: &'r Regex,
69+
text: &'t str,
70+
last_end: usize,
71+
last_match: Option<usize>,
72+
}
73+
74+
impl<'r, 't> Iterator for FindMatches<'r, 't> {
75+
type Item = (usize, usize);
76+
77+
fn next(&mut self) -> Option<(usize, usize)> {
78+
match self.re.find_at(self.text, self.last_end) {
79+
None => None,
80+
Some((s, e)) => {
81+
self.last_end = e;
82+
Some((s, e))
83+
}
84+
}
85+
}
86+
}
87+
88+
type d_regex = c_void;
89+
90+
#[repr(C)]
91+
struct d_string {
92+
len: usize,
93+
text: *const c_uchar,
94+
}
95+
96+
impl<'a> From<&'a str> for d_string {
97+
fn from(s: &'a str) -> d_string {
98+
d_string { len: s.len(), text: s.as_ptr() }
99+
}
100+
}
101+
102+
extern {
103+
fn rt_init() -> i32;
104+
fn rt_term() -> i32;
105+
fn d_phobos_regex_new(s: d_string) -> *mut d_regex;
106+
fn d_phobos_regex_free(r: *mut d_regex);
107+
fn d_phobos_regex_is_match(r: *mut d_regex, s: d_string) -> bool;
108+
fn d_phobos_regex_find_at(r: *mut d_regex,
109+
s: d_string,
110+
start: usize,
111+
match_start: *mut usize,
112+
match_end: *mut usize)
113+
-> bool;
114+
}
115+

bench/src/ffi/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
// of conditional compilation and such.
1313
#![allow(dead_code)]
1414

15+
#[cfg(any(feature = "re-dphobos-dmd",
16+
feature = "re-dphobos-ldc"))]
17+
pub mod d_phobos;
1518
#[cfg(feature = "re-onig")]
1619
pub mod onig;
1720
#[cfg(feature = "re-pcre1")]

bench/src/sherlock.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,14 @@ sherlock!(the_whitespace, r"the\s+\w+", 5410);
8888

8989
// How fast can we match everything? This essentially defeats any clever prefix
9090
// tricks and just executes the DFA across the entire input.
91+
#[cfg(not(feature = "re-dphobos-dmd"))]
92+
#[cfg(not(feature = "re-dphobos-ldc"))]
9193
#[cfg(not(feature = "re-pcre1"))]
9294
#[cfg(not(feature = "re-pcre2"))]
9395
#[cfg(not(feature = "re-tcl"))]
9496
sherlock!(everything_greedy, r".*", 13053);
97+
#[cfg(not(feature = "re-dphobos-dmd"))]
98+
#[cfg(not(feature = "re-dphobos-ldc"))]
9599
#[cfg(not(feature = "re-onig"))]
96100
#[cfg(not(feature = "re-pcre1"))]
97101
#[cfg(not(feature = "re-pcre2"))]
@@ -148,10 +152,19 @@ sherlock!(quotes, r#"["'][^"']{0,30}[?!.]["']"#, 767);
148152
// Finds all occurrences of Sherlock Holmes at the beginning or end of a line.
149153
// The empty assertions defeat any detection of prefix literals, so it's the
150154
// lazy DFA the entire way.
155+
#[cfg(not(any(feature = "re-dphobos-dmd",
156+
feature = "re-dphobos-ldc")))]
151157
sherlock!(
152158
line_boundary_sherlock_holmes,
153159
r"(?m)^Sherlock Holmes|Sherlock Holmes$",
154160
34);
161+
// D matches both \r\n and \n as EOL
162+
#[cfg(any(feature = "re-dphobos-dmd",
163+
feature = "re-dphobos-ldc"))]
164+
sherlock!(
165+
line_boundary_sherlock_holmes,
166+
r"(?m)^Sherlock Holmes|Sherlock Holmes$",
167+
37);
155168

156169
// All words ending in `n`. This uses Unicode word boundaries, which the DFA
157170
// can speculatively handle. Since this benchmark is on mostly ASCII text, it

0 commit comments

Comments
 (0)