1
+ #[ macro_use]
2
+ extern crate criterion;
3
+ extern crate html5ever;
4
+
5
+ use std:: fs;
6
+ use std:: path:: PathBuf ;
7
+
8
+ use criterion:: { Criterion , black_box, ParameterizedBenchmark } ;
9
+
10
+ use html5ever:: tokenizer:: { BufferQueue , TokenSink , Token , Tokenizer , TokenizerOpts , TokenSinkResult } ;
11
+ use html5ever:: tendril:: * ;
12
+
13
+ struct Sink ;
14
+
15
+ impl TokenSink for Sink {
16
+ type Handle = ( ) ;
17
+
18
+ fn process_token ( & mut self , token : Token , _line_number : u64 ) -> TokenSinkResult < ( ) > {
19
+ // Don't use the token, but make sure we don't get
20
+ // optimized out entirely.
21
+ black_box ( token) ;
22
+ TokenSinkResult :: Continue
23
+ }
24
+ }
25
+
26
+ impl Sink {
27
+ fn run ( input : Vec < StrTendril > , opts : TokenizerOpts ) {
28
+ let mut tok = Tokenizer :: new ( Sink , opts. clone ( ) ) ;
29
+ let mut buffer = BufferQueue :: new ( ) ;
30
+ for buf in input. into_iter ( ) {
31
+ buffer. push_back ( buf) ;
32
+ let _ = tok. feed ( & mut buffer) ;
33
+ }
34
+ let _ = tok. feed ( & mut buffer) ;
35
+ tok. end ( ) ;
36
+ }
37
+ }
38
+
39
+ fn run_bench ( c : & mut Criterion , name : & str , opts : TokenizerOpts ) {
40
+ let mut path = PathBuf :: from ( env ! ( "CARGO_MANIFEST_DIR" ) ) ;
41
+ path. push ( "data/bench/" ) ;
42
+ path. push ( name) ;
43
+ let mut file = fs:: File :: open ( & path) . ok ( ) . expect ( "can't open file" ) ;
44
+
45
+ // Read the file and treat it as an infinitely repeating sequence of characters.
46
+ let mut file_input = ByteTendril :: new ( ) ;
47
+ file. read_to_tendril ( & mut file_input) . ok ( ) . expect ( "can't read file" ) ;
48
+ let file_input: StrTendril = file_input. try_reinterpret ( ) . unwrap ( ) ;
49
+ let size = file_input. len ( ) ;
50
+ let mut stream = file_input. chars ( ) . cycle ( ) ;
51
+
52
+ // Break the input into chunks of 1024 chars (= a few kB).
53
+ // This simulates reading from the network.
54
+ let mut input = vec ! [ ] ;
55
+ let mut total = 0usize ;
56
+ while total < size {
57
+ // The by_ref() call is important, otherwise we get wrong results!
58
+ // See rust-lang/rust#18045.
59
+ let sz = std:: cmp:: min ( 1024 , size - total) ;
60
+ input. push ( stream. by_ref ( ) . take ( sz) . collect :: < String > ( ) . to_tendril ( ) ) ;
61
+ total += sz;
62
+ }
63
+
64
+ let mut test_name = String :: new ( ) ;
65
+ test_name. push_str ( "tokenizing" ) ;
66
+ test_name. push_str ( " " ) ;
67
+ test_name. push_str ( name) ;
68
+
69
+ c. bench_function ( & test_name, move |b| b. iter ( || {
70
+ let mut tok = Tokenizer :: new ( Sink , opts. clone ( ) ) ;
71
+ let mut buffer = BufferQueue :: new ( ) ;
72
+ // We are doing clone inside the bench function, this is not ideal, but possibly
73
+ // necessary since our iterator consumes the underlying buffer.
74
+ for buf in input. clone ( ) . into_iter ( ) {
75
+ buffer. push_back ( buf) ;
76
+ let _ = tok. feed ( & mut buffer) ;
77
+ }
78
+ let _ = tok. feed ( & mut buffer) ;
79
+ tok. end ( ) ;
80
+ } ) ) ;
81
+ }
82
+
83
+
84
+
85
+ fn html5ever_benchmark ( c : & mut Criterion ) {
86
+ run_bench ( c, "lipsum.html" , Default :: default ( ) ) ;
87
+ run_bench ( c, "lipsum-zh.html" , Default :: default ( ) ) ;
88
+ run_bench ( c, "medium-fragment.html" , Default :: default ( ) ) ;
89
+ run_bench ( c, "small-fragment.html" , Default :: default ( ) ) ;
90
+ run_bench ( c, "tiny-fragment.html" , Default :: default ( ) ) ;
91
+ run_bench ( c, "strong.html" , Default :: default ( ) ) ;
92
+ }
93
+
94
+ criterion_group ! ( benches, html5ever_benchmark) ;
95
+ criterion_main ! ( benches) ;
0 commit comments