Skip to content

Commit 5bbaa90

Browse files
dralleyMingun
authored andcommitted
More correctly normalize attribute values
1 parent df7df69 commit 5bbaa90

File tree

5 files changed

+315
-14
lines changed

5 files changed

+315
-14
lines changed

Changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,20 @@
1515

1616
### New Features
1717

18+
- [#379]: Improved compliance with the XML attribute value normalization process by
19+
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
20+
which ought to be used in place of `Attribute::unescape_value()` and
21+
`Attribute::unescape_value_with()`
22+
1823
### Bug Fixes
1924

2025
- [#379]: Properly normalize EOL characters in `BytesText::decode`, `BytesCData::decode`
2126
and `BytesRef::decode` methods
2227

2328
### Misc Changes
2429

30+
[#379]: https://github.com/tafia/quick-xml/pull/379
31+
2532

2633
## 0.38.0 -- 2025-06-28
2734

benches/macrobenches.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,13 @@ static INPUTS: &[(&str, &str)] = &[
4444
("players.xml", PLAYERS),
4545
];
4646

47-
// TODO: use fully normalized attribute values
4847
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
4948
let mut r = Reader::from_str(doc);
5049
loop {
5150
match black_box(r.read_event()?) {
5251
Event::Start(e) | Event::Empty(e) => {
5352
for attr in e.attributes() {
54-
black_box(attr?.decode_and_unescape_value(r.decoder())?);
53+
black_box(attr?.decode_and_normalize_value(r.decoder(), 128)?);
5554
}
5655
}
5756
Event::Text(e) => {

benches/microbenches.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,50 @@ fn attributes(c: &mut Criterion) {
243243
assert_eq!(count, 150);
244244
})
245245
});
246+
247+
group.finish();
248+
}
249+
250+
/// Benchmarks normalizing attribute values
251+
fn attribute_value_normalization(c: &mut Criterion) {
252+
let mut group = c.benchmark_group("attribute_value_normalization");
253+
254+
group.bench_function("noop_short", |b| {
255+
b.iter(|| {
256+
black_box(unescape("foobar")).unwrap();
257+
})
258+
});
259+
260+
group.bench_function("noop_long", |b| {
261+
b.iter(|| {
262+
black_box(unescape("just a bit of text without any entities")).unwrap();
263+
})
264+
});
265+
266+
group.bench_function("replacement_chars", |b| {
267+
b.iter(|| {
268+
black_box(unescape("just a bit\n of text without\tany entities")).unwrap();
269+
})
270+
});
271+
272+
group.bench_function("char_reference", |b| {
273+
b.iter(|| {
274+
let text = "prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
275+
black_box(unescape(text)).unwrap();
276+
let text = "&#38;&#60;";
277+
black_box(unescape(text)).unwrap();
278+
})
279+
});
280+
281+
group.bench_function("entity_reference", |b| {
282+
b.iter(|| {
283+
let text = "age &gt; 72 &amp;&amp; age &lt; 21";
284+
black_box(unescape(text)).unwrap();
285+
let text = "&quot;what&apos;s that?&quot;";
286+
black_box(unescape(text)).unwrap();
287+
})
288+
});
289+
246290
group.finish();
247291
}
248292

@@ -355,6 +399,7 @@ criterion_group!(
355399
read_resolved_event_into,
356400
one_event,
357401
attributes,
402+
attribute_value_normalization,
358403
escaping,
359404
unescaping,
360405
);

src/errors.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ impl From<EscapeError> for Error {
229229
}
230230

231231
impl From<AttrError> for Error {
232+
/// Creates a new `Error::InvalidAttr` from the given error
232233
#[inline]
233234
fn from(error: AttrError) -> Self {
234235
Self::InvalidAttr(error)

0 commit comments

Comments
 (0)