1
+ #[ macro_use]
2
+ extern crate criterion;
3
+ extern crate xml5ever;
4
+ extern crate markup5ever;
5
+
6
+ use std:: fs;
7
+ use std:: path:: PathBuf ;
8
+
9
+ use criterion:: { Criterion , black_box, ParameterizedBenchmark } ;
10
+
11
+ use markup5ever:: buffer_queue:: BufferQueue ;
12
+ use xml5ever:: tokenizer:: { TokenSink , Token , XmlTokenizer , XmlTokenizerOpts } ;
13
+ use xml5ever:: tendril:: * ;
14
+
15
+ struct Sink ;
16
+
17
+ impl TokenSink for Sink {
18
+ fn process_token ( & mut self , token : Token ) {
19
+ // Don't use the token, but make sure we don't get
20
+ // optimized out entirely.
21
+ black_box ( token) ;
22
+ }
23
+ }
24
+
25
+ impl Sink {
26
+ fn run ( input : Vec < StrTendril > , opts : XmlTokenizerOpts ) {
27
+ let mut tok = XmlTokenizer :: new ( Sink , opts. clone ( ) ) ;
28
+ let mut buffer = BufferQueue :: new ( ) ;
29
+ for buf in input. into_iter ( ) {
30
+ buffer. push_back ( buf) ;
31
+ let _ = tok. feed ( & mut buffer) ;
32
+ }
33
+ let _ = tok. feed ( & mut buffer) ;
34
+ tok. end ( ) ;
35
+ }
36
+ }
37
+
38
+ fn run_bench ( c : & mut Criterion , name : & str , opts : XmlTokenizerOpts ) {
39
+ let mut path = PathBuf :: from ( env ! ( "CARGO_MANIFEST_DIR" ) ) ;
40
+ path. push ( "data/bench/" ) ;
41
+ path. push ( name) ;
42
+ let mut file = fs:: File :: open ( & path) . ok ( ) . expect ( "can't open file" ) ;
43
+
44
+ // Read the file and treat it as an infinitely repeating sequence of characters.
45
+ let mut file_input = ByteTendril :: new ( ) ;
46
+ file. read_to_tendril ( & mut file_input) . ok ( ) . expect ( "can't read file" ) ;
47
+ let file_input: StrTendril = file_input. try_reinterpret ( ) . unwrap ( ) ;
48
+ let size = file_input. len ( ) ;
49
+ let mut stream = file_input. chars ( ) . cycle ( ) ;
50
+
51
+ // Break the input into chunks of 1024 chars (= a few kB).
52
+ // This simulates reading from the network.
53
+ let mut input = vec ! [ ] ;
54
+ let mut total = 0usize ;
55
+ while total < size {
56
+ // The by_ref() call is important, otherwise we get wrong results!
57
+ // See rust-lang/rust#18045.
58
+ let sz = std:: cmp:: min ( 1024 , size - total) ;
59
+ input. push ( stream. by_ref ( ) . take ( sz) . collect :: < String > ( ) . to_tendril ( ) ) ;
60
+ total += sz;
61
+ }
62
+
63
+ let mut test_name = String :: new ( ) ;
64
+ test_name. push_str ( "tokenizing" ) ;
65
+ test_name. push_str ( " " ) ;
66
+ test_name. push_str ( name) ;
67
+
68
+ c. bench_function ( & test_name, move |b| b. iter ( || {
69
+ let mut tok = XmlTokenizer :: new ( Sink , opts. clone ( ) ) ;
70
+ let mut buffer = BufferQueue :: new ( ) ;
71
+ // We are doing clone inside the bench function, this is not ideal, but possibly
72
+ // necessary since our iterator consumes the underlying buffer.
73
+ for buf in input. clone ( ) . into_iter ( ) {
74
+ buffer. push_back ( buf) ;
75
+ let _ = tok. feed ( & mut buffer) ;
76
+ }
77
+ let _ = tok. feed ( & mut buffer) ;
78
+ tok. end ( ) ;
79
+ } ) ) ;
80
+ }
81
+
82
+
83
+
84
+ fn xml5ever_benchmarks ( c : & mut Criterion ) {
85
+ run_bench ( c, "strong.xml" , Default :: default ( ) ) ;
86
+ }
87
+
88
+ criterion_group ! ( benches, xml5ever_benchmarks) ;
89
+ criterion_main ! ( benches) ;
0 commit comments