Skip to content

Commit c145576

Browse files
authored
Merge pull request #451 from dralley/extend-benchmarks
Extend macrobenchmark suite to cover namespaces, buffered vs. nocopy
2 parents 9ccc686 + d41a7f1 commit c145576

File tree

1 file changed

+180
-50
lines changed

1 file changed

+180
-50
lines changed

benches/macrobenches.rs

Lines changed: 180 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,78 @@
11
use criterion::{self, criterion_group, criterion_main, Criterion, Throughput};
22
use quick_xml::events::Event;
3-
use quick_xml::Reader;
43
use quick_xml::Result as XmlResult;
4+
use quick_xml::{NsReader, Reader};
5+
6+
static RPM_PRIMARY: &str = include_str!("../tests/documents/rpm_primary.xml");
7+
static RPM_PRIMARY2: &str = include_str!("../tests/documents/rpm_primary2.xml");
8+
static RPM_FILELISTS: &str = include_str!("../tests/documents/rpm_filelists.xml");
9+
static RPM_OTHER: &str = include_str!("../tests/documents/rpm_other.xml");
10+
static LIBREOFFICE_DOCUMENT: &str = include_str!("../tests/documents/libreoffice_document.fodt");
11+
static DOCUMENT: &str = include_str!("../tests/documents/document.xml");
12+
static TEST_WRITER_INDENT: &str = include_str!("../tests/documents/test_writer_indent.xml");
13+
static SAMPLE_1: &str = include_str!("../tests/documents/sample_1.xml");
14+
static LINESCORE: &str = include_str!("../tests/documents/linescore.xml");
15+
static SAMPLE_RSS: &str = include_str!("../tests/documents/sample_rss.xml");
16+
static SAMPLE_NS: &str = include_str!("../tests/documents/sample_ns.xml");
17+
static PLAYERS: &str = include_str!("../tests/documents/players.xml");
18+
19+
static INPUTS: &[(&str, &str)] = &[
20+
// long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
21+
("rpm_primary.xml", RPM_PRIMARY),
22+
// long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
23+
("rpm_primary2.xml", RPM_PRIMARY2),
24+
// long, mostly medium-length text elements, not much escaping
25+
("rpm_filelists.xml", RPM_FILELISTS),
26+
// long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
27+
("rpm_other.xml", RPM_OTHER),
28+
// long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
29+
("libreoffice_document.fodt", LIBREOFFICE_DOCUMENT),
30+
// medium length, mostly empty tags, a few short attributes per element, no escaping
31+
("document.xml", DOCUMENT),
32+
// medium length, lots of namespaces, no escaping
33+
("test_writer_ident.xml", TEST_WRITER_INDENT),
34+
// short, mix of attributes and text, lots of escapes
35+
("sample_1.xml", SAMPLE_1),
36+
// medium length, lots of attributes, short attributes, few escapes
37+
("linescore.xml", LINESCORE),
38+
// short, lots of namespaces, no escapes
39+
("sample_ns.xml", SAMPLE_NS),
40+
// long, few attributes, mix of attribute lengths, escapes in text content
41+
("sample_rss.xml", SAMPLE_RSS),
42+
// long, lots of attributes, short attributes, no text, no escapes
43+
("players.xml", PLAYERS),
44+
];
45+
46+
// TODO: use fully normalized attribute values
47+
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
48+
let mut r = Reader::from_str(doc);
49+
loop {
50+
match criterion::black_box(r.read_event()?) {
51+
Event::Start(e) | Event::Empty(e) => {
52+
for attr in e.attributes() {
53+
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
54+
}
55+
}
56+
Event::Text(e) => {
57+
criterion::black_box(e.unescape()?);
58+
}
59+
Event::CData(e) => {
60+
criterion::black_box(e.into_inner());
61+
}
62+
Event::End(_) => (),
63+
Event::Eof => break,
64+
_ => (),
65+
}
66+
}
67+
Ok(())
68+
}
569

6-
static RPM_PRIMARY: &[u8] = include_bytes!("../tests/documents/rpm_primary.xml");
7-
static RPM_PRIMARY2: &[u8] = include_bytes!("../tests/documents/rpm_primary2.xml");
8-
static RPM_FILELISTS: &[u8] = include_bytes!("../tests/documents/rpm_filelists.xml");
9-
static RPM_OTHER: &[u8] = include_bytes!("../tests/documents/rpm_other.xml");
10-
static LIBREOFFICE_DOCUMENT: &[u8] = include_bytes!("../tests/documents/libreoffice_document.fodt");
11-
static DOCUMENT: &[u8] = include_bytes!("../tests/documents/document.xml");
12-
static TEST_WRITER_INDENT: &[u8] = include_bytes!("../tests/documents/test_writer_indent.xml");
13-
static SAMPLE_1: &[u8] = include_bytes!("../tests/documents/sample_1.xml");
14-
static LINESCORE: &[u8] = include_bytes!("../tests/documents/linescore.xml");
15-
static SAMPLE_RSS: &[u8] = include_bytes!("../tests/documents/sample_rss.xml");
16-
static SAMPLE_NS: &[u8] = include_bytes!("../tests/documents/sample_ns.xml");
17-
static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
18-
19-
// TODO: read the namespaces too
2070
// TODO: use fully normalized attribute values
21-
fn parse_document(doc: &[u8]) -> XmlResult<()> {
71+
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
2272
let mut r = Reader::from_reader(doc);
73+
let mut buf = Vec::new();
2374
loop {
24-
match r.read_event()? {
75+
match criterion::black_box(r.read_event_into(&mut buf)?) {
2576
Event::Start(e) | Event::Empty(e) => {
2677
for attr in e.attributes() {
2778
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
@@ -37,49 +88,128 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
3788
Event::Eof => break,
3889
_ => (),
3990
}
91+
buf.clear();
4092
}
4193
Ok(())
4294
}
4395

44-
pub fn bench_fully_parse_document(c: &mut Criterion) {
45-
let mut group = c.benchmark_group("fully_parse_document");
46-
47-
let inputs = [
48-
// long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
49-
("rpm_primary.xml", RPM_PRIMARY),
50-
// long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
51-
("rpm_primary2.xml", RPM_PRIMARY2),
52-
// long, mostly medium-length text elements, not much escaping
53-
("rpm_filelists.xml", RPM_FILELISTS),
54-
// long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
55-
("rpm_other.xml", RPM_OTHER),
56-
// long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
57-
("libreoffice_document.fodt", LIBREOFFICE_DOCUMENT),
58-
// medium length, mostly empty tags, a few short attributes per element, no escaping
59-
("document.xml", DOCUMENT),
60-
// medium length, lots of namespaces, no escaping
61-
("test_writer_ident.xml", TEST_WRITER_INDENT),
62-
// short, mix of attributes and text, lots of escapes
63-
("sample_1.xml", SAMPLE_1),
64-
// medium length, lots of attributes, short attributes, few escapes
65-
("linescore.xml", LINESCORE),
66-
// short, lots of namespaces, no escapes
67-
("sample_ns.xml", SAMPLE_NS),
68-
// long, few attributes, mix of attribute lengths, escapes in text content
69-
("sample_rss.xml", SAMPLE_RSS),
70-
// long, lots of attributes, short attributes, no text, no escapes
71-
("players.xml", PLAYERS),
72-
];
73-
74-
for (id, data) in inputs.iter() {
96+
// TODO: use fully normalized attribute values
97+
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
98+
let mut r = NsReader::from_str(doc);
99+
loop {
100+
match criterion::black_box(r.read_resolved_event()?) {
101+
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
102+
criterion::black_box(resolved_ns);
103+
for attr in e.attributes() {
104+
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
105+
}
106+
}
107+
(resolved_ns, Event::Text(e)) => {
108+
criterion::black_box(e.unescape()?);
109+
criterion::black_box(resolved_ns);
110+
}
111+
(resolved_ns, Event::CData(e)) => {
112+
criterion::black_box(e.into_inner());
113+
criterion::black_box(resolved_ns);
114+
}
115+
(_, Event::End(_)) => (),
116+
(_, Event::Eof) => break,
117+
_ => (),
118+
}
119+
}
120+
Ok(())
121+
}
122+
123+
// TODO: use fully normalized attribute values
124+
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
125+
let mut r = NsReader::from_reader(doc);
126+
let mut buf = Vec::new();
127+
loop {
128+
match criterion::black_box(r.read_resolved_event_into(&mut buf)?) {
129+
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
130+
criterion::black_box(resolved_ns);
131+
for attr in e.attributes() {
132+
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
133+
}
134+
}
135+
(resolved_ns, Event::Text(e)) => {
136+
criterion::black_box(e.unescape()?);
137+
criterion::black_box(resolved_ns);
138+
}
139+
(resolved_ns, Event::CData(e)) => {
140+
criterion::black_box(e.into_inner());
141+
criterion::black_box(resolved_ns);
142+
}
143+
(_, Event::End(_)) => (),
144+
(_, Event::Eof) => break,
145+
_ => (),
146+
}
147+
buf.clear();
148+
}
149+
Ok(())
150+
}
151+
152+
/// Just parse - no decoding overhead
153+
pub fn bench_parse_document_nocopy(c: &mut Criterion) {
154+
let mut group = c.benchmark_group("parse_document_nocopy");
155+
156+
for (id, data) in INPUTS.iter() {
157+
group.throughput(Throughput::Bytes(data.len() as u64));
158+
group.bench_with_input(*id, *data, |b, input| {
159+
b.iter(|| parse_document_from_str(input).unwrap())
160+
});
161+
}
162+
163+
group.finish();
164+
}
165+
166+
/// Decode into a buffer, then parse
167+
pub fn bench_decode_and_parse_document(c: &mut Criterion) {
168+
let mut group = c.benchmark_group("decode_and_parse_document");
169+
170+
for (id, data) in INPUTS.iter() {
171+
group.throughput(Throughput::Bytes(data.len() as u64));
172+
group.bench_with_input(*id, *data, |b, input| {
173+
b.iter(|| parse_document_from_bytes(input.as_bytes()).unwrap())
174+
});
175+
}
176+
177+
group.finish();
178+
}
179+
180+
/// Just parse - no decoding overhead - including namespaces
181+
pub fn bench_parse_document_nocopy_with_namespaces(c: &mut Criterion) {
182+
let mut group = c.benchmark_group("parse_document_nocopy_with_namespaces");
183+
184+
for (id, data) in INPUTS.iter() {
185+
group.throughput(Throughput::Bytes(data.len() as u64));
186+
group.bench_with_input(*id, *data, |b, input| {
187+
b.iter(|| parse_document_from_str_with_namespaces(input).unwrap())
188+
});
189+
}
190+
191+
group.finish();
192+
}
193+
194+
/// Decode into a buffer, then parse - including namespaces
195+
pub fn bench_decode_and_parse_document_with_namespaces(c: &mut Criterion) {
196+
let mut group = c.benchmark_group("decode_and_parse_document_with_namespaces");
197+
198+
for (id, data) in INPUTS.iter() {
75199
group.throughput(Throughput::Bytes(data.len() as u64));
76200
group.bench_with_input(*id, *data, |b, input| {
77-
b.iter(|| parse_document(input).unwrap())
201+
b.iter(|| parse_document_from_bytes_with_namespaces(input.as_bytes()).unwrap())
78202
});
79203
}
80204

81205
group.finish();
82206
}
83207

84-
criterion_group!(benches, bench_fully_parse_document,);
208+
criterion_group!(
209+
benches,
210+
bench_parse_document_nocopy,
211+
bench_decode_and_parse_document,
212+
bench_parse_document_nocopy_with_namespaces,
213+
bench_decode_and_parse_document_with_namespaces,
214+
);
85215
criterion_main!(benches);

0 commit comments

Comments
 (0)