Skip to content

Commit 6d0d9bb

Browse files
authored
Don't clone input buffer inside html5ever benchmark loop (#634)
* Impl additional traits for BufferQueue/TokenizerResult Signed-off-by: Simon Wülker <[email protected]> * Don't clone input inside html5ever benchmark loop Signed-off-by: Simon Wülker <[email protected]> --------- Signed-off-by: Simon Wülker <[email protected]>
1 parent c74a7ad commit 6d0d9bb

File tree

3 files changed

+22
-16
lines changed

3 files changed

+22
-16
lines changed

html5ever/benches/html5ever.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ extern crate html5ever;
55
use std::fs;
66
use std::path::PathBuf;
77

8-
use criterion::Criterion;
8+
use criterion::{BatchSize, Criterion};
99

10-
use html5ever::tendril::*;
1110
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
11+
use html5ever::{tendril::*, TokenizerResult};
1212

1313
struct Sink;
1414

@@ -51,19 +51,25 @@ fn run_bench(c: &mut Criterion, name: &str) {
5151

5252
let test_name = format!("html tokenizing {name}");
5353

54+
// Construct a buffer queue to feed to the tokenizer
55+
let buffer_queue = BufferQueue::default();
56+
for buf in input.into_iter() {
57+
buffer_queue.push_back(buf);
58+
}
59+
5460
c.bench_function(&test_name, move |b| {
55-
b.iter(|| {
56-
let tok = Tokenizer::new(Sink, Default::default());
57-
let buffer = BufferQueue::default();
58-
// We are doing clone inside the bench function, this is not ideal, but possibly
59-
// necessary since our iterator consumes the underlying buffer.
60-
for buf in input.clone().into_iter() {
61-
buffer.push_back(buf);
62-
let _ = tok.feed(&buffer);
63-
}
64-
let _ = tok.feed(&buffer);
65-
tok.end();
66-
})
61+
b.iter_batched(
62+
|| buffer_queue.clone(),
63+
|buffer_queue| {
64+
let tok = Tokenizer::new(Sink, Default::default());
65+
66+
// Tokenize the entire input, ignoring any <script> elements we find along the way
67+
while tok.feed(&buffer_queue) != TokenizerResult::Done {}
68+
69+
tok.end();
70+
},
71+
BatchSize::SmallInput,
72+
)
6773
});
6874
}
6975

markup5ever/interface/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl fmt::Debug for ExpandedName<'_> {
6161
}
6262

6363
#[must_use]
64-
#[derive(Debug)]
64+
#[derive(Debug, PartialEq)]
6565
pub enum TokenizerResult<Handle> {
6666
Done,
6767
Script(Handle),

markup5ever/util/buffer_queue.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub enum SetResult {
4747
/// Internally it uses [`VecDeque`] and has the same complexity properties.
4848
///
4949
/// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
50-
#[derive(Debug)]
50+
#[derive(Clone, Debug)]
5151
pub struct BufferQueue {
5252
/// Buffers to process.
5353
buffers: RefCell<VecDeque<StrTendril>>,

0 commit comments

Comments
 (0)