11use criterion:: { self , criterion_group, criterion_main, Criterion , Throughput } ;
22use quick_xml:: events:: Event ;
3- use quick_xml:: Reader ;
43use quick_xml:: Result as XmlResult ;
4+ use quick_xml:: { NsReader , Reader } ;
5+
6+ static RPM_PRIMARY : & str = include_str ! ( "../tests/documents/rpm_primary.xml" ) ;
7+ static RPM_PRIMARY2 : & str = include_str ! ( "../tests/documents/rpm_primary2.xml" ) ;
8+ static RPM_FILELISTS : & str = include_str ! ( "../tests/documents/rpm_filelists.xml" ) ;
9+ static RPM_OTHER : & str = include_str ! ( "../tests/documents/rpm_other.xml" ) ;
10+ static LIBREOFFICE_DOCUMENT : & str = include_str ! ( "../tests/documents/libreoffice_document.fodt" ) ;
11+ static DOCUMENT : & str = include_str ! ( "../tests/documents/document.xml" ) ;
12+ static TEST_WRITER_INDENT : & str = include_str ! ( "../tests/documents/test_writer_indent.xml" ) ;
13+ static SAMPLE_1 : & str = include_str ! ( "../tests/documents/sample_1.xml" ) ;
14+ static LINESCORE : & str = include_str ! ( "../tests/documents/linescore.xml" ) ;
15+ static SAMPLE_RSS : & str = include_str ! ( "../tests/documents/sample_rss.xml" ) ;
16+ static SAMPLE_NS : & str = include_str ! ( "../tests/documents/sample_ns.xml" ) ;
17+ static PLAYERS : & str = include_str ! ( "../tests/documents/players.xml" ) ;
18+
19+ static INPUTS : & [ ( & str , & str ) ] = & [
20+ // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
21+ ( "rpm_primary.xml" , RPM_PRIMARY ) ,
22+ // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
23+ ( "rpm_primary2.xml" , RPM_PRIMARY2 ) ,
24+ // long, mostly medium-length text elements, not much escaping
25+ ( "rpm_filelists.xml" , RPM_FILELISTS ) ,
26+ // long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
27+ ( "rpm_other.xml" , RPM_OTHER ) ,
28+ // long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
29+ ( "libreoffice_document.fodt" , LIBREOFFICE_DOCUMENT ) ,
30+ // medium length, mostly empty tags, a few short attributes per element, no escaping
31+ ( "document.xml" , DOCUMENT ) ,
32+ // medium length, lots of namespaces, no escaping
33+ ( "test_writer_ident.xml" , TEST_WRITER_INDENT ) ,
34+ // short, mix of attributes and text, lots of escapes
35+ ( "sample_1.xml" , SAMPLE_1 ) ,
36+ // medium length, lots of attributes, short attributes, few escapes
37+ ( "linescore.xml" , LINESCORE ) ,
38+ // short, lots of namespaces, no escapes
39+ ( "sample_ns.xml" , SAMPLE_NS ) ,
40+ // long, few attributes, mix of attribute lengths, escapes in text content
41+ ( "sample_rss.xml" , SAMPLE_RSS ) ,
42+ // long, lots of attributes, short attributes, no text, no escapes
43+ ( "players.xml" , PLAYERS ) ,
44+ ] ;
45+
46+ // TODO: use fully normalized attribute values
47+ fn parse_document_from_str ( doc : & str ) -> XmlResult < ( ) > {
48+ let mut r = Reader :: from_str ( doc) ;
49+ loop {
50+ match criterion:: black_box ( r. read_event ( ) ?) {
51+ Event :: Start ( e) | Event :: Empty ( e) => {
52+ for attr in e. attributes ( ) {
53+ criterion:: black_box ( attr?. decode_and_unescape_value ( & r) ?) ;
54+ }
55+ }
56+ Event :: Text ( e) => {
57+ criterion:: black_box ( e. unescape ( ) ?) ;
58+ }
59+ Event :: CData ( e) => {
60+ criterion:: black_box ( e. into_inner ( ) ) ;
61+ }
62+ Event :: End ( _) => ( ) ,
63+ Event :: Eof => break ,
64+ _ => ( ) ,
65+ }
66+ }
67+ Ok ( ( ) )
68+ }
569
6- static RPM_PRIMARY : & [ u8 ] = include_bytes ! ( "../tests/documents/rpm_primary.xml" ) ;
7- static RPM_PRIMARY2 : & [ u8 ] = include_bytes ! ( "../tests/documents/rpm_primary2.xml" ) ;
8- static RPM_FILELISTS : & [ u8 ] = include_bytes ! ( "../tests/documents/rpm_filelists.xml" ) ;
9- static RPM_OTHER : & [ u8 ] = include_bytes ! ( "../tests/documents/rpm_other.xml" ) ;
10- static LIBREOFFICE_DOCUMENT : & [ u8 ] = include_bytes ! ( "../tests/documents/libreoffice_document.fodt" ) ;
11- static DOCUMENT : & [ u8 ] = include_bytes ! ( "../tests/documents/document.xml" ) ;
12- static TEST_WRITER_INDENT : & [ u8 ] = include_bytes ! ( "../tests/documents/test_writer_indent.xml" ) ;
13- static SAMPLE_1 : & [ u8 ] = include_bytes ! ( "../tests/documents/sample_1.xml" ) ;
14- static LINESCORE : & [ u8 ] = include_bytes ! ( "../tests/documents/linescore.xml" ) ;
15- static SAMPLE_RSS : & [ u8 ] = include_bytes ! ( "../tests/documents/sample_rss.xml" ) ;
16- static SAMPLE_NS : & [ u8 ] = include_bytes ! ( "../tests/documents/sample_ns.xml" ) ;
17- static PLAYERS : & [ u8 ] = include_bytes ! ( "../tests/documents/players.xml" ) ;
18-
19- // TODO: read the namespaces too
2070// TODO: use fully normalized attribute values
21- fn parse_document ( doc : & [ u8 ] ) -> XmlResult < ( ) > {
71+ fn parse_document_from_bytes ( doc : & [ u8 ] ) -> XmlResult < ( ) > {
2272 let mut r = Reader :: from_reader ( doc) ;
73+ let mut buf = Vec :: new ( ) ;
2374 loop {
24- match r . read_event ( ) ? {
75+ match criterion :: black_box ( r . read_event_into ( & mut buf ) ? ) {
2576 Event :: Start ( e) | Event :: Empty ( e) => {
2677 for attr in e. attributes ( ) {
2778 criterion:: black_box ( attr?. decode_and_unescape_value ( & r) ?) ;
@@ -37,49 +88,128 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
3788 Event :: Eof => break ,
3889 _ => ( ) ,
3990 }
91+ buf. clear ( ) ;
4092 }
4193 Ok ( ( ) )
4294}
4395
44- pub fn bench_fully_parse_document ( c : & mut Criterion ) {
45- let mut group = c. benchmark_group ( "fully_parse_document" ) ;
46-
47- let inputs = [
48- // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
49- ( "rpm_primary.xml" , RPM_PRIMARY ) ,
50- // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
51- ( "rpm_primary2.xml" , RPM_PRIMARY2 ) ,
52- // long, mostly medium-length text elements, not much escaping
53- ( "rpm_filelists.xml" , RPM_FILELISTS ) ,
54- // long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
55- ( "rpm_other.xml" , RPM_OTHER ) ,
56- // long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
57- ( "libreoffice_document.fodt" , LIBREOFFICE_DOCUMENT ) ,
58- // medium length, mostly empty tags, a few short attributes per element, no escaping
59- ( "document.xml" , DOCUMENT ) ,
60- // medium length, lots of namespaces, no escaping
61- ( "test_writer_ident.xml" , TEST_WRITER_INDENT ) ,
62- // short, mix of attributes and text, lots of escapes
63- ( "sample_1.xml" , SAMPLE_1 ) ,
64- // medium length, lots of attributes, short attributes, few escapes
65- ( "linescore.xml" , LINESCORE ) ,
66- // short, lots of namespaces, no escapes
67- ( "sample_ns.xml" , SAMPLE_NS ) ,
68- // long, few attributes, mix of attribute lengths, escapes in text content
69- ( "sample_rss.xml" , SAMPLE_RSS ) ,
70- // long, lots of attributes, short attributes, no text, no escapes
71- ( "players.xml" , PLAYERS ) ,
72- ] ;
73-
74- for ( id, data) in inputs. iter ( ) {
96+ // TODO: use fully normalized attribute values
97+ fn parse_document_from_str_with_namespaces ( doc : & str ) -> XmlResult < ( ) > {
98+ let mut r = NsReader :: from_str ( doc) ;
99+ loop {
100+ match criterion:: black_box ( r. read_resolved_event ( ) ?) {
101+ ( resolved_ns, Event :: Start ( e) | Event :: Empty ( e) ) => {
102+ criterion:: black_box ( resolved_ns) ;
103+ for attr in e. attributes ( ) {
104+ criterion:: black_box ( attr?. decode_and_unescape_value ( & r) ?) ;
105+ }
106+ }
107+ ( resolved_ns, Event :: Text ( e) ) => {
108+ criterion:: black_box ( e. unescape ( ) ?) ;
109+ criterion:: black_box ( resolved_ns) ;
110+ }
111+ ( resolved_ns, Event :: CData ( e) ) => {
112+ criterion:: black_box ( e. into_inner ( ) ) ;
113+ criterion:: black_box ( resolved_ns) ;
114+ }
115+ ( _, Event :: End ( _) ) => ( ) ,
116+ ( _, Event :: Eof ) => break ,
117+ _ => ( ) ,
118+ }
119+ }
120+ Ok ( ( ) )
121+ }
122+
123+ // TODO: use fully normalized attribute values
124+ fn parse_document_from_bytes_with_namespaces ( doc : & [ u8 ] ) -> XmlResult < ( ) > {
125+ let mut r = NsReader :: from_reader ( doc) ;
126+ let mut buf = Vec :: new ( ) ;
127+ loop {
128+ match criterion:: black_box ( r. read_resolved_event_into ( & mut buf) ?) {
129+ ( resolved_ns, Event :: Start ( e) | Event :: Empty ( e) ) => {
130+ criterion:: black_box ( resolved_ns) ;
131+ for attr in e. attributes ( ) {
132+ criterion:: black_box ( attr?. decode_and_unescape_value ( & r) ?) ;
133+ }
134+ }
135+ ( resolved_ns, Event :: Text ( e) ) => {
136+ criterion:: black_box ( e. unescape ( ) ?) ;
137+ criterion:: black_box ( resolved_ns) ;
138+ }
139+ ( resolved_ns, Event :: CData ( e) ) => {
140+ criterion:: black_box ( e. into_inner ( ) ) ;
141+ criterion:: black_box ( resolved_ns) ;
142+ }
143+ ( _, Event :: End ( _) ) => ( ) ,
144+ ( _, Event :: Eof ) => break ,
145+ _ => ( ) ,
146+ }
147+ buf. clear ( ) ;
148+ }
149+ Ok ( ( ) )
150+ }
151+
152+ /// Just parse - no decoding overhead
153+ pub fn bench_parse_document_nocopy ( c : & mut Criterion ) {
154+ let mut group = c. benchmark_group ( "parse_document_nocopy" ) ;
155+
156+ for ( id, data) in INPUTS . iter ( ) {
157+ group. throughput ( Throughput :: Bytes ( data. len ( ) as u64 ) ) ;
158+ group. bench_with_input ( * id, * data, |b, input| {
159+ b. iter ( || parse_document_from_str ( input) . unwrap ( ) )
160+ } ) ;
161+ }
162+
163+ group. finish ( ) ;
164+ }
165+
166+ /// Decode into a buffer, then parse
167+ pub fn bench_decode_and_parse_document ( c : & mut Criterion ) {
168+ let mut group = c. benchmark_group ( "decode_and_parse_document" ) ;
169+
170+ for ( id, data) in INPUTS . iter ( ) {
171+ group. throughput ( Throughput :: Bytes ( data. len ( ) as u64 ) ) ;
172+ group. bench_with_input ( * id, * data, |b, input| {
173+ b. iter ( || parse_document_from_bytes ( input. as_bytes ( ) ) . unwrap ( ) )
174+ } ) ;
175+ }
176+
177+ group. finish ( ) ;
178+ }
179+
180+ /// Just parse - no decoding overhead - including namespaces
181+ pub fn bench_parse_document_nocopy_with_namespaces ( c : & mut Criterion ) {
182+ let mut group = c. benchmark_group ( "parse_document_nocopy_with_namespaces" ) ;
183+
184+ for ( id, data) in INPUTS . iter ( ) {
185+ group. throughput ( Throughput :: Bytes ( data. len ( ) as u64 ) ) ;
186+ group. bench_with_input ( * id, * data, |b, input| {
187+ b. iter ( || parse_document_from_str_with_namespaces ( input) . unwrap ( ) )
188+ } ) ;
189+ }
190+
191+ group. finish ( ) ;
192+ }
193+
194+ /// Decode into a buffer, then parse - including namespaces
195+ pub fn bench_decode_and_parse_document_with_namespaces ( c : & mut Criterion ) {
196+ let mut group = c. benchmark_group ( "decode_and_parse_document_with_namespaces" ) ;
197+
198+ for ( id, data) in INPUTS . iter ( ) {
75199 group. throughput ( Throughput :: Bytes ( data. len ( ) as u64 ) ) ;
76200 group. bench_with_input ( * id, * data, |b, input| {
77- b. iter ( || parse_document ( input) . unwrap ( ) )
201+ b. iter ( || parse_document_from_bytes_with_namespaces ( input. as_bytes ( ) ) . unwrap ( ) )
78202 } ) ;
79203 }
80204
81205 group. finish ( ) ;
82206}
83207
84- criterion_group ! ( benches, bench_fully_parse_document, ) ;
208+ criterion_group ! (
209+ benches,
210+ bench_parse_document_nocopy,
211+ bench_decode_and_parse_document,
212+ bench_parse_document_nocopy_with_namespaces,
213+ bench_decode_and_parse_document_with_namespaces,
214+ ) ;
85215criterion_main ! ( benches) ;
0 commit comments