Skip to content

Commit 3b37c0e

Browse files
authored
Merge pull request #398 from Mingun/no-empty-texts
Do not generate empty `Text` events
2 parents e701c4d + bdf9f46 commit 3b37c0e

File tree

4 files changed

+121
-16
lines changed

4 files changed

+121
-16
lines changed

Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
- [#393]: Now `event_namespace`, `attribute_namespace` and `read_event_namespaced`
3030
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
3131
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
32+
- [#363]: Do not generate empty `Event::Text` events
3233

3334
### Misc Changes
3435

@@ -61,9 +62,11 @@
6162
- [#387]: Added a bunch of tests for sequences deserialization
6263
- [#393]: Added more tests for namespace resolver
6364
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
65+
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
6466

6567
[#8]: https://github.com/Mingun/fast-xml/pull/8
6668
[#9]: https://github.com/Mingun/fast-xml/pull/9
69+
[#363]: https://github.com/tafia/quick-xml/issues/363
6770
[#387]: https://github.com/tafia/quick-xml/pull/387
6871
[#391]: https://github.com/tafia/quick-xml/pull/391
6972
[#393]: https://github.com/tafia/quick-xml/pull/393

src/reader.rs

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,11 @@ impl<R: BufRead> Reader<R> {
266266

267267
if self.trim_text_start {
268268
self.reader.skip_whitespace(&mut self.buf_position)?;
269-
if self.reader.skip_one(b'<', &mut self.buf_position)? {
270-
return self.read_event_buffered(buf);
271-
}
269+
}
270+
271+
// If we already at the `<` symbol, do not try to return an empty Text event
272+
if self.reader.skip_one(b'<', &mut self.buf_position)? {
273+
return self.read_event_buffered(buf);
272274
}
273275

274276
match self
@@ -2244,6 +2246,116 @@ mod test {
22442246
}
22452247
}
22462248
}
2249+
2250+
/// Ensures, that no empty `Text` events are generated
2251+
mod read_event_buffered {
2252+
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
2253+
use crate::reader::Reader;
2254+
use pretty_assertions::assert_eq;
2255+
2256+
#[test]
2257+
fn declaration() {
2258+
let mut reader = Reader::from_str("<?xml ?>");
2259+
2260+
assert_eq!(
2261+
reader.read_event_buffered($buf).unwrap(),
2262+
Event::Decl(BytesDecl::from_start(BytesStart::borrowed(b"xml ", 3)))
2263+
);
2264+
}
2265+
2266+
#[test]
2267+
fn doctype() {
2268+
let mut reader = Reader::from_str("<!DOCTYPE x>");
2269+
2270+
assert_eq!(
2271+
reader.read_event_buffered($buf).unwrap(),
2272+
Event::DocType(BytesText::from_escaped(b"x".as_ref()))
2273+
);
2274+
}
2275+
2276+
#[test]
2277+
fn processing_instruction() {
2278+
let mut reader = Reader::from_str("<?xml-stylesheet?>");
2279+
2280+
assert_eq!(
2281+
reader.read_event_buffered($buf).unwrap(),
2282+
Event::PI(BytesText::from_escaped(b"xml-stylesheet".as_ref()))
2283+
);
2284+
}
2285+
2286+
#[test]
2287+
fn start() {
2288+
let mut reader = Reader::from_str("<tag>");
2289+
2290+
assert_eq!(
2291+
reader.read_event_buffered($buf).unwrap(),
2292+
Event::Start(BytesStart::borrowed_name(b"tag"))
2293+
);
2294+
}
2295+
2296+
#[test]
2297+
fn end() {
2298+
let mut reader = Reader::from_str("</tag>");
2299+
// Because we expect invalid XML, do not check that
2300+
// the end name paired with the start name
2301+
reader.check_end_names(false);
2302+
2303+
assert_eq!(
2304+
reader.read_event_buffered($buf).unwrap(),
2305+
Event::End(BytesEnd::borrowed(b"tag"))
2306+
);
2307+
}
2308+
2309+
#[test]
2310+
fn empty() {
2311+
let mut reader = Reader::from_str("<tag/>");
2312+
2313+
assert_eq!(
2314+
reader.read_event_buffered($buf).unwrap(),
2315+
Event::Empty(BytesStart::borrowed_name(b"tag"))
2316+
);
2317+
}
2318+
2319+
#[test]
2320+
fn text() {
2321+
let mut reader = Reader::from_str("text");
2322+
2323+
assert_eq!(
2324+
reader.read_event_buffered($buf).unwrap(),
2325+
Event::Text(BytesText::from_escaped(b"text".as_ref()))
2326+
);
2327+
}
2328+
2329+
#[test]
2330+
fn cdata() {
2331+
let mut reader = Reader::from_str("<![CDATA[]]>");
2332+
2333+
assert_eq!(
2334+
reader.read_event_buffered($buf).unwrap(),
2335+
Event::CData(BytesCData::from_str(""))
2336+
);
2337+
}
2338+
2339+
#[test]
2340+
fn comment() {
2341+
let mut reader = Reader::from_str("<!---->");
2342+
2343+
assert_eq!(
2344+
reader.read_event_buffered($buf).unwrap(),
2345+
Event::Comment(BytesText::from_escaped(b"".as_ref()))
2346+
);
2347+
}
2348+
2349+
#[test]
2350+
fn eof() {
2351+
let mut reader = Reader::from_str("");
2352+
2353+
assert_eq!(
2354+
reader.read_event_buffered($buf).unwrap(),
2355+
Event::Eof
2356+
);
2357+
}
2358+
}
22472359
};
22482360
}
22492361

tests/unit_tests.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,7 @@ fn test_trim_test() {
168168

169169
let mut r = Reader::from_str(txt);
170170
r.trim_text(false);
171-
next_eq!(
172-
r, Text, b"", Start, b"a", Text, b"", Start, b"b", Text, b" ", End, b"b", Text, b"", End,
173-
b"a"
174-
);
171+
next_eq!(r, Start, b"a", Start, b"b", Text, b" ", End, b"b", End, b"a");
175172
}
176173

177174
#[test]

tests/xmlrs_reader_tests.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -257,9 +257,7 @@ fn issue_98_cdata_ending_with_right_bracket() {
257257
r#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
258258
r#"
259259
|StartElement(hello)
260-
|Characters()
261260
|CData(Foo [Bar])
262-
|Characters()
263261
|EndElement(hello)
264262
|EndDocument
265263
"#,
@@ -306,9 +304,7 @@ fn issue_105_unexpected_double_dash() {
306304
r#"<hello><![CDATA[--]]></hello>"#,
307305
r#"
308306
|StartElement(hello)
309-
|Characters()
310307
|CData(--)
311-
|Characters()
312308
|EndElement(hello)
313309
|EndDocument
314310
"#,
@@ -359,10 +355,12 @@ fn default_namespace_applies_to_end_elem() {
359355
);
360356
}
361357

358+
#[track_caller]
362359
fn test(input: &str, output: &str, is_short: bool) {
363360
test_bytes(input.as_bytes(), output.as_bytes(), is_short);
364361
}
365362

363+
#[track_caller]
366364
fn test_bytes(input: &[u8], output: &[u8], is_short: bool) {
367365
// Normalize newlines on Windows to just \n, which is what the reader and
368366
// writer use.
@@ -380,11 +378,6 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) {
380378
let mut buf = Vec::new();
381379
let mut ns_buffer = Vec::new();
382380

383-
if !is_short {
384-
// discard first whitespace
385-
reader.read_event(&mut buf).unwrap();
386-
}
387-
388381
loop {
389382
buf.clear();
390383
let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer);

0 commit comments

Comments
 (0)