Skip to content

Commit 9fb181a

Browse files
authored
Merge pull request #684 from Mingun/errors
Add new tests for syntax and ill-formed parser errors and fix... emm... errors
2 parents 64c4249 + 2c55638 commit 9fb181a

File tree

15 files changed

+928
-254
lines changed

15 files changed

+928
-254
lines changed

Changelog.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,39 @@ configuration is serializable.
2020
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
2121
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
2222
direct access to fields of config using `reader.config_mut().<...>`.
23+
- #[#684]: Added a method `Config::enable_all_checks` to turn on or off all
24+
well-formedness checks.
2325

2426
### Bug Fixes
2527

2628
- [#622]: Fix wrong disregarding of not closed markup, such as lone `<`.
29+
- [#684]: Fix incorrect position reported for `Error::IllFormed(DoubleHyphenInComment)`.
30+
- [#684]: Fix incorrect position reported for `Error::IllFormed(MissingDoctypeName)`.
2731

2832
### Misc Changes
2933

3034
- [#675]: Minimum supported version of serde raised to 1.0.139
3135
- [#675]: Rework the `quick_xml::Error` type to provide more accurate information:
32-
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEnd` in some cases
33-
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEnd` in some cases
36+
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEndTag` in some cases
37+
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEndTag` in some cases
3438
- `Error::TextNotFound` was removed because not used
3539
- `Error::UnexpectedBang` replaced by `SyntaxError`
3640
- `Error::UnexpectedEof` replaced by `SyntaxError` in some cases
3741
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
3842
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`
43+
- `Error::XmlDeclWithoutVersion` replaced by `IllFormedError::MissingDeclVersion` (in [#684])
44+
- `Error::EmptyDocType` replaced by `IllFormedError::MissingDoctypeName` (in [#684])
45+
- [#684]: Changed positions reported for `SyntaxError`s: now they are always points
46+
to the start of markup (i. e. to the `<` character) with error.
47+
- [#684]: Now `<??>` parsed as `Event::PI` with empty content instead of raising
48+
syntax error.
49+
- [#684]: Now `<?xml?>` parsed as `Event::Decl` instead of `Event::PI`.
3950

4051
[#513]: https://github.com/tafia/quick-xml/issues/513
4152
[#622]: https://github.com/tafia/quick-xml/issues/622
4253
[#675]: https://github.com/tafia/quick-xml/pull/675
4354
[#677]: https://github.com/tafia/quick-xml/pull/677
55+
[#684]: https://github.com/tafia/quick-xml/pull/684
4456

4557

4658
## 0.31.0 -- 2023-10-22

src/de/mod.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2649,7 +2649,7 @@ where
26492649
/// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
26502650
/// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
26512651
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2652-
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissedEnd))`](DeError::InvalidXml)
2652+
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
26532653
///
26542654
/// [`Text`]: Event::Text
26552655
/// [`CData`]: Event::CData
@@ -3642,7 +3642,7 @@ mod tests {
36423642

36433643
match de.read_to_end(QName(b"tag")) {
36443644
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3645-
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
3645+
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
36463646
}
36473647
x => panic!(
36483648
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -3661,7 +3661,7 @@ mod tests {
36613661

36623662
match de.read_to_end(QName(b"tag")) {
36633663
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3664-
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
3664+
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
36653665
}
36663666
x => panic!(
36673667
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -3756,7 +3756,7 @@ mod tests {
37563756
fn read_string() {
37573757
match from_str::<String>(r#"</root>"#) {
37583758
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3759-
assert_eq!(cause, IllFormedError::UnmatchedEnd("root".into()));
3759+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
37603760
}
37613761
x => panic!(
37623762
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -3770,7 +3770,7 @@ mod tests {
37703770
match from_str::<String>(r#"<root></other>"#) {
37713771
Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
37723772
cause,
3773-
IllFormedError::MismatchedEnd {
3773+
IllFormedError::MismatchedEndTag {
37743774
expected: "root".into(),
37753775
found: "other".into(),
37763776
}
@@ -4098,7 +4098,7 @@ mod tests {
40984098
assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
40994099
match de.next() {
41004100
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4101-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag2".into()));
4101+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
41024102
}
41034103
x => panic!(
41044104
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4241,7 +4241,7 @@ mod tests {
42414241
let mut de = make_de("</tag>");
42424242
match de.next() {
42434243
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4244-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4244+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
42454245
}
42464246
x => panic!(
42474247
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4320,7 +4320,7 @@ mod tests {
43204320
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
43214321
match de.next() {
43224322
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4323-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4323+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
43244324
}
43254325
x => panic!(
43264326
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4352,7 +4352,7 @@ mod tests {
43524352
assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
43534353
match de.next() {
43544354
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4355-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4355+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
43564356
}
43574357
x => panic!(
43584358
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4458,7 +4458,7 @@ mod tests {
44584458
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
44594459
match de.next() {
44604460
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4461-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4461+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
44624462
}
44634463
x => panic!(
44644464
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4488,7 +4488,7 @@ mod tests {
44884488
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
44894489
match de.next() {
44904490
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4491-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4491+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
44924492
}
44934493
x => panic!(
44944494
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
@@ -4538,7 +4538,7 @@ mod tests {
45384538
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
45394539
match de.next() {
45404540
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4541-
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
4541+
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
45424542
}
45434543
x => panic!(
45444544
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",

src/errors.rs

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,36 @@ impl std::error::Error for SyntaxError {}
7171
/// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed
7272
#[derive(Clone, Debug, PartialEq, Eq)]
7373
pub enum IllFormedError {
74+
/// A `version` attribute was not found in an XML declaration or is not the
75+
/// first attribute.
76+
///
77+
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
78+
/// a `version` attribute and it MUST be the first attribute. This error indicates,
79+
/// that the declaration does not contain attributes at all (if contains `None`)
80+
/// or either `version` attribute is not present or not the first attribute in
81+
/// the declaration. In the last case it contains the name of the found attribute.
82+
///
83+
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
84+
MissingDeclVersion(Option<String>),
85+
/// A document type definition (DTD) does not contain a name of a root element.
86+
///
87+
/// According to the [specification], document type definition (`<!DOCTYPE foo>`)
88+
/// MUST contain a name which defines a document type (`foo`). If that name
89+
/// is missed, this error is returned.
90+
///
91+
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
92+
MissingDoctypeName,
7493
/// The end tag was not found during reading of a sub-tree of elements due to
7594
/// encountering an EOF from the underlying reader. This error is returned from
7695
/// [`Reader::read_to_end`].
7796
///
7897
/// [`Reader::read_to_end`]: crate::reader::Reader::read_to_end
79-
MissedEnd(String),
98+
MissingEndTag(String),
8099
/// The specified end tag was encountered without corresponding open tag at the
81100
/// same level of hierarchy
82-
UnmatchedEnd(String),
101+
UnmatchedEndTag(String),
83102
/// The specified end tag does not match the start tag at that nesting level.
84-
MismatchedEnd {
103+
MismatchedEndTag {
85104
/// Name of open tag, that is expected to be closed
86105
expected: String,
87106
/// Name of actually closed tag
@@ -103,15 +122,25 @@ pub enum IllFormedError {
103122
impl fmt::Display for IllFormedError {
104123
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
105124
match self {
106-
Self::MissedEnd(tag) => write!(
125+
Self::MissingDeclVersion(None) => {
126+
write!(f, "an XML declaration does not contain `version` attribute")
127+
}
128+
Self::MissingDeclVersion(Some(attr)) => {
129+
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
130+
}
131+
Self::MissingDoctypeName => write!(
132+
f,
133+
"`<!DOCTYPE>` declaration does not contain a name of a document type"
134+
),
135+
Self::MissingEndTag(tag) => write!(
107136
f,
108137
"start tag not closed: `</{}>` not found before end of input",
109138
tag,
110139
),
111-
Self::UnmatchedEnd(tag) => {
140+
Self::UnmatchedEndTag(tag) => {
112141
write!(f, "close tag `</{}>` does not match any open tag", tag)
113142
}
114-
Self::MismatchedEnd { expected, found } => write!(
143+
Self::MismatchedEndTag { expected, found } => write!(
115144
f,
116145
"expected `</{}>`, but `</{}>` was found",
117146
expected, found,
@@ -143,25 +172,6 @@ pub enum Error {
143172
///
144173
/// [`encoding`]: index.html#encoding
145174
NonDecodable(Option<Utf8Error>),
146-
/// A `version` attribute was not found in an XML declaration or is not the
147-
/// first attribute.
148-
///
149-
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
150-
/// a `version` attribute and it MUST be the first attribute. This error indicates,
151-
/// that the declaration does not contain attributes at all (if contains `None`)
152-
/// or either `version` attribute is not present or not the first attribute in
153-
/// the declaration. In the last case it contains the name of the found attribute.
154-
///
155-
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
156-
XmlDeclWithoutVersion(Option<String>),
157-
/// A document type definition (DTD) does not contain a name of a root element.
158-
///
159-
/// According to the [specification], document type definition (`<!doctype foo>`)
160-
/// MUST contain a name which defines a document type. If that name is missed,
161-
/// this error is returned.
162-
///
163-
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
164-
EmptyDocType,
165175
/// Attribute parsing error
166176
InvalidAttr(AttrError),
167177
/// Escape error
@@ -189,7 +199,7 @@ pub enum Error {
189199
impl Error {
190200
pub(crate) fn missed_end(name: QName, decoder: Decoder) -> Self {
191201
match decoder.decode(name.as_ref()) {
192-
Ok(name) => IllFormedError::MissedEnd(name.into()).into(),
202+
Ok(name) => IllFormedError::MissingEndTag(name.into()).into(),
193203
Err(err) => err.into(),
194204
}
195205
}
@@ -261,16 +271,6 @@ impl fmt::Display for Error {
261271
Error::IllFormed(e) => write!(f, "ill-formed document: {}", e),
262272
Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"),
263273
Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e),
264-
Error::XmlDeclWithoutVersion(None) => {
265-
write!(f, "an XML declaration does not contain `version` attribute")
266-
}
267-
Error::XmlDeclWithoutVersion(Some(attr)) => {
268-
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
269-
}
270-
Error::EmptyDocType => write!(
271-
f,
272-
"`<!DOCTYPE>` declaration does not contain a name of a document type"
273-
),
274274
Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e),
275275
Error::EscapeError(e) => write!(f, "{}", e),
276276
Error::UnknownPrefix(prefix) => {

src/events/mod.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use std::ops::Deref;
4545
use std::str::from_utf8;
4646

4747
use crate::encoding::Decoder;
48-
use crate::errors::{Error, Result};
48+
use crate::errors::{Error, IllFormedError, Result};
4949
use crate::escape::{escape, partial_escape, unescape_with};
5050
use crate::name::{LocalName, QName};
5151
use crate::reader::is_whitespace;
@@ -391,12 +391,12 @@ impl<'a> BytesDecl<'a> {
391391
/// In case of multiple attributes value of the first one is returned.
392392
///
393393
/// If version is missed in the declaration, or the first thing is not a version,
394-
/// [`Error::XmlDeclWithoutVersion`] will be returned.
394+
/// [`IllFormedError::MissingDeclVersion`] will be returned.
395395
///
396396
/// # Examples
397397
///
398398
/// ```
399-
/// use quick_xml::Error;
399+
/// use quick_xml::errors::{Error, IllFormedError};
400400
/// use quick_xml::events::{BytesDecl, BytesStart};
401401
///
402402
/// // <?xml version='1.1'?>
@@ -410,21 +410,21 @@ impl<'a> BytesDecl<'a> {
410410
/// // <?xml encoding='utf-8'?>
411411
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
412412
/// match decl.version() {
413-
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
413+
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
414414
/// _ => assert!(false),
415415
/// }
416416
///
417417
/// // <?xml encoding='utf-8' version='1.1'?>
418418
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
419419
/// match decl.version() {
420-
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
420+
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
421421
/// _ => assert!(false),
422422
/// }
423423
///
424424
/// // <?xml?>
425425
/// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
426426
/// match decl.version() {
427-
/// Err(Error::XmlDeclWithoutVersion(None)) => {},
427+
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
428428
/// _ => assert!(false),
429429
/// }
430430
/// ```
@@ -437,12 +437,14 @@ impl<'a> BytesDecl<'a> {
437437
// first attribute was not "version"
438438
Some(Ok(a)) => {
439439
let found = from_utf8(a.key.as_ref())?.to_string();
440-
Err(Error::XmlDeclWithoutVersion(Some(found)))
440+
Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
441+
found,
442+
))))
441443
}
442444
// error parsing attributes
443445
Some(Err(e)) => Err(e.into()),
444446
// no attributes
445-
None => Err(Error::XmlDeclWithoutVersion(None)),
447+
None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
446448
}
447449
}
448450

src/reader/buffered_reader.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ macro_rules! impl_buffered_source {
105105
buf.push(b'!');
106106
self $(.$reader)? .consume(1);
107107

108-
let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?;
108+
let bang_type = BangType::new(self.peek_one() $(.$await)? ?, position)?;
109109

110110
loop {
111111
match self $(.$reader)? .fill_buf() $(.$await)? {
@@ -139,6 +139,10 @@ macro_rules! impl_buffered_source {
139139
}
140140
}
141141

142+
// <!....EOF
143+
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
144+
// so we move offset to it (+1 for `<`)
145+
*position -= 1;
142146
Err(bang_type.to_err())
143147
}
144148

@@ -182,6 +186,10 @@ macro_rules! impl_buffered_source {
182186
};
183187
}
184188

189+
// <.....EOF
190+
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
191+
// so we move offset to it (+1 for `<`)
192+
*position -= 1;
185193
Err(Error::Syntax(SyntaxError::UnclosedTag))
186194
}
187195

0 commit comments

Comments
 (0)