Skip to content

Commit ba98072

Browse files
committed
Add criterion.rs benchmarks to html5ever
1 parent d09dd0f commit ba98072

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

html5ever/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,13 @@ markup5ever = { version = "0.7", path = "../markup5ever" }
3939
rustc-serialize = "0.3.15"
4040
rustc-test = "0.3"
4141
typed-arena = "1.3.0"
42+
criterion = "0.2"
4243

4344
[build-dependencies]
4445
quote = "0.6"
4546
syn = { version = "0.15", features = ["extra-traits", "full", "fold"] }
4647
proc-macro2 = "0.4"
48+
49+
[[bench]]
50+
name = "html5ever"
51+
harness = false

html5ever/benches/html5ever.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#[macro_use]
2+
extern crate criterion;
3+
extern crate html5ever;
4+
5+
use std::fs;
6+
use std::path::PathBuf;
7+
8+
use criterion::{Criterion, black_box, ParameterizedBenchmark};
9+
10+
use html5ever::tokenizer::{BufferQueue, TokenSink, Token, Tokenizer, TokenizerOpts, TokenSinkResult};
11+
use html5ever::tendril::*;
12+
13+
struct Sink;
14+
15+
impl TokenSink for Sink {
16+
type Handle = ();
17+
18+
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
19+
// Don't use the token, but make sure we don't get
20+
// optimized out entirely.
21+
black_box(token);
22+
TokenSinkResult::Continue
23+
}
24+
}
25+
26+
impl Sink {
27+
fn run(input: Vec<StrTendril>, opts: TokenizerOpts) {
28+
let mut tok = Tokenizer::new(Sink, opts.clone());
29+
let mut buffer = BufferQueue::new();
30+
for buf in input.into_iter() {
31+
buffer.push_back(buf);
32+
let _ = tok.feed(&mut buffer);
33+
}
34+
let _ = tok.feed(&mut buffer);
35+
tok.end();
36+
}
37+
}
38+
39+
fn run_bench(c: &mut Criterion, name: &str, opts: TokenizerOpts) {
40+
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
41+
path.push("data/bench/");
42+
path.push(name);
43+
let mut file = fs::File::open(&path).ok().expect("can't open file");
44+
45+
// Read the file and treat it as an infinitely repeating sequence of characters.
46+
let mut file_input = ByteTendril::new();
47+
file.read_to_tendril(&mut file_input).ok().expect("can't read file");
48+
let file_input: StrTendril = file_input.try_reinterpret().unwrap();
49+
let size = file_input.len();
50+
let mut stream = file_input.chars().cycle();
51+
52+
// Break the input into chunks of 1024 chars (= a few kB).
53+
// This simulates reading from the network.
54+
let mut input = vec![];
55+
let mut total = 0usize;
56+
while total < size {
57+
// The by_ref() call is important, otherwise we get wrong results!
58+
// See rust-lang/rust#18045.
59+
let sz = std::cmp::min(1024, size - total);
60+
input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
61+
total += sz;
62+
}
63+
64+
let mut test_name = String::new();
65+
test_name.push_str("tokenizing");
66+
test_name.push_str(" ");
67+
test_name.push_str(name);
68+
69+
c.bench_function(&test_name, move |b| b.iter(|| {
70+
let mut tok = Tokenizer::new(Sink, opts.clone());
71+
let mut buffer = BufferQueue::new();
72+
// We are doing clone inside the bench function, this is not ideal, but possibly
73+
// necessary since our iterator consumes the underlying buffer.
74+
for buf in input.clone().into_iter() {
75+
buffer.push_back(buf);
76+
let _ = tok.feed(&mut buffer);
77+
}
78+
let _ = tok.feed(&mut buffer);
79+
tok.end();
80+
}));
81+
}
82+
83+
84+
85+
fn html5ever_benchmark(c: &mut Criterion) {
86+
run_bench(c, "lipsum.html", Default::default());
87+
run_bench(c, "lipsum-zh.html", Default::default());
88+
run_bench(c, "medium-fragment.html", Default::default());
89+
run_bench(c, "small-fragment.html", Default::default());
90+
run_bench(c, "tiny-fragment.html", Default::default());
91+
run_bench(c, "strong.html", Default::default());
92+
}
93+
94+
criterion_group!(benches, html5ever_benchmark);
95+
criterion_main!(benches);

0 commit comments

Comments
 (0)