@@ -5,10 +5,10 @@ extern crate html5ever;
5
5
use std:: fs;
6
6
use std:: path:: PathBuf ;
7
7
8
- use criterion:: Criterion ;
8
+ use criterion:: { BatchSize , Criterion } ;
9
9
10
- use html5ever:: tendril:: * ;
11
10
use html5ever:: tokenizer:: { BufferQueue , Token , TokenSink , TokenSinkResult , Tokenizer } ;
11
+ use html5ever:: { tendril:: * , TokenizerResult } ;
12
12
13
13
struct Sink ;
14
14
@@ -51,19 +51,25 @@ fn run_bench(c: &mut Criterion, name: &str) {
51
51
52
52
let test_name = format ! ( "html tokenizing {name}" ) ;
53
53
54
+ // Construct a buffer queue to feed to the tokenizer
55
+ let buffer_queue = BufferQueue :: default ( ) ;
56
+ for buf in input. into_iter ( ) {
57
+ buffer_queue. push_back ( buf) ;
58
+ }
59
+
54
60
c. bench_function ( & test_name, move |b| {
55
- b. iter ( || {
56
- let tok = Tokenizer :: new ( Sink , Default :: default ( ) ) ;
57
- let buffer = BufferQueue :: default ( ) ;
58
- // We are doing clone inside the bench function, this is not ideal, but possibly
59
- // necessary since our iterator consumes the underlying buffer.
60
- for buf in input. clone ( ) . into_iter ( ) {
61
- buffer . push_back ( buf ) ;
62
- let _ = tok . feed ( & buffer ) ;
63
- }
64
- let _ = tok . feed ( & buffer ) ;
65
- tok . end ( ) ;
66
- } )
61
+ b. iter_batched (
62
+ || buffer_queue . clone ( ) ,
63
+ |buffer_queue| {
64
+ let tok = Tokenizer :: new ( Sink , Default :: default ( ) ) ;
65
+
66
+ // Tokenize the entire input, ignoring any <script> elements we find along the way
67
+ while tok . feed ( & buffer_queue ) != TokenizerResult :: Done { }
68
+
69
+ tok . end ( ) ;
70
+ } ,
71
+ BatchSize :: SmallInput ,
72
+ )
67
73
} ) ;
68
74
}
69
75
0 commit comments