Skip to content

Commit dd34cab

Browse files
authored
Merge pull request #660 from Mingun/fix-580
Fix `UnexpectedEof` when deserialize `xs:list`s and newtypes
2 parents ae8db96 + 1077d00 commit dd34cab

File tree

6 files changed

+298
-99
lines changed

6 files changed

+298
-99
lines changed

Changelog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
2020

2121
### Bug Fixes
2222

23+
- [#660]: Fixed incorrect deserialization of `xs:list`s from empty tags (`<tag/>`
24+
or `<tag></tag>`). Previously an `DeError::UnexpectedEof")` was returned in that case
25+
- [#580]: Fixed incorrect deserialization of vectors of newtypes from sequences of tags.
26+
2327
### Misc Changes
2428

2529
- [#643]: Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases.
@@ -32,11 +36,13 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
3236
(and newly added `ElementWriter::write_inner_content_async` of course).
3337

3438
[#545]: https://github.com/tafia/quick-xml/pull/545
39+
[#580]: https://github.com/tafia/quick-xml/issues/580
3540
[#619]: https://github.com/tafia/quick-xml/issues/619
3641
[#635]: https://github.com/tafia/quick-xml/pull/635
3742
[#643]: https://github.com/tafia/quick-xml/pull/643
3843
[#649]: https://github.com/tafia/quick-xml/pull/646
3944
[#651]: https://github.com/tafia/quick-xml/pull/651
45+
[#660]: https://github.com/tafia/quick-xml/pull/660
4046

4147

4248
## 0.30.0 -- 2023-07-23

src/de/map.rs

Lines changed: 129 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,52 @@ macro_rules! forward {
373373
/// A deserializer for a value of map or struct. That deserializer slightly
374374
/// differently processes events for a primitive types and sequences than
375375
/// a [`Deserializer`].
376+
///
377+
/// This deserializer can see two kind of events at the start:
378+
/// - [`DeEvent::Text`]
379+
/// - [`DeEvent::Start`]
380+
///
381+
/// which represents two possible variants of items:
382+
/// ```xml
383+
/// <item>A tag item</item>
384+
/// A text item
385+
/// <yet another="tag item"/>
386+
/// ```
387+
///
388+
/// This deserializer are very similar to a [`SeqItemDeserializer`]. The only difference
389+
/// in the `deserialize_seq` method. This deserializer will act as an iterator
390+
/// over tags / text within it's parent tag, whereas the [`SeqItemDeserializer`]
391+
/// will represent sequences as an `xs:list`.
392+
///
393+
/// This deserializer processes items as following:
394+
/// - primitives (numbers, booleans, strings, characters) are deserialized either
395+
/// from a text content, or unwrapped from a one level of a tag. So, `123` and
396+
/// `<int>123</int>` both can be deserialized into an `u32`;
397+
/// - `Option`:
398+
/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
399+
/// - everything else are deserialized as `Some` using the same deserializer,
400+
/// including `<tag/>` or `<tag></tag>`;
401+
/// - units (`()`) and unit structs consumes the whole text or element subtree;
402+
/// - newtype structs are deserialized by forwarding deserialization of inner type
403+
/// with the same deserializer;
404+
/// - sequences, tuples and tuple structs are deserialized by iterating within the
405+
/// parent tag and deserializing each tag or text content using [`SeqItemDeserializer`];
406+
/// - structs and maps are deserialized using new instance of [`MapAccess`];
407+
/// - enums:
408+
/// - in case of [`DeEvent::Text`] event the text content is deserialized as
409+
/// a `$text` variant. Enum content is deserialized from the text using
410+
/// [`SimpleTypeDeserializer`];
411+
/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
412+
/// an enum tag, and the content inside are deserialized as an enum content.
413+
/// Depending on a variant kind deserialization is performed as:
414+
/// - unit variants: consuming text content or a subtree;
415+
/// - newtype variants: forward deserialization to the inner type using
416+
/// this deserializer;
417+
/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
418+
/// - struct variants: call [`deserialize_struct`] of this deserializer.
419+
///
420+
/// [`deserialize_tuple`]: #method.deserialize_tuple
421+
/// [`deserialize_struct`]: #method.deserialize_struct
376422
struct MapValueDeserializer<'de, 'a, 'm, R, E>
377423
where
378424
R: XmlRead<'de>,
@@ -485,7 +531,6 @@ where
485531

486532
forward!(deserialize_unit);
487533

488-
forward!(deserialize_map);
489534
forward!(deserialize_struct(
490535
name: &'static str,
491536
fields: &'static [&'static str]
@@ -497,7 +542,6 @@ where
497542
));
498543

499544
forward!(deserialize_any);
500-
forward!(deserialize_ignored_any);
501545

502546
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
503547
where
@@ -506,6 +550,19 @@ where
506550
deserialize_option!(self.map.de, self, visitor)
507551
}
508552

553+
/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
554+
/// with the same deserializer.
555+
fn deserialize_newtype_struct<V>(
556+
self,
557+
_name: &'static str,
558+
visitor: V,
559+
) -> Result<V::Value, Self::Error>
560+
where
561+
V: Visitor<'de>,
562+
{
563+
visitor.visit_newtype_struct(self)
564+
}
565+
509566
/// Deserializes each `<tag>` in
510567
/// ```xml
511568
/// <any-tag>
@@ -716,7 +773,59 @@ where
716773

717774
////////////////////////////////////////////////////////////////////////////////////////////////////
718775

719-
/// A deserializer for a single item of a sequence.
776+
/// A deserializer for a single item of a mixed sequence of tags and text.
777+
///
778+
/// This deserializer can see two kind of events at the start:
779+
/// - [`DeEvent::Text`]
780+
/// - [`DeEvent::Start`]
781+
///
782+
/// which represents two possible variants of items:
783+
/// ```xml
784+
/// <item>A tag item</item>
785+
/// A text item
786+
/// <yet another="tag item"/>
787+
/// ```
788+
///
789+
/// This deserializer are very similar to a [`MapValueDeserializer`]. The only difference
790+
/// in the `deserialize_seq` method. This deserializer will perform deserialization
791+
/// from the textual content (the text itself in case of [`DeEvent::Text`] event
792+
/// and the text between tags in case of [`DeEvent::Start`] event), whereas
793+
/// the [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
794+
///
795+
/// This deserializer processes items as following:
796+
/// - primitives (numbers, booleans, strings, characters) are deserialized either
797+
/// from a text content, or unwrapped from a one level of a tag. So, `123` and
798+
/// `<int>123</int>` both can be deserialized into an `u32`;
799+
/// - `Option`:
800+
/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
801+
/// - everything else are deserialized as `Some` using the same deserializer,
802+
/// including `<tag/>` or `<tag></tag>`;
803+
/// - units (`()`) and unit structs consumes the whole text or element subtree;
804+
/// - newtype structs are deserialized as tuple structs with one element;
805+
/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
806+
/// (this is the difference):
807+
/// - in case of [`DeEvent::Text`] event text content passed to the deserializer directly;
808+
/// - in case of [`DeEvent::Start`] event the start and end tags are stripped,
809+
/// and text between them is passed to [`SimpleTypeDeserializer`]. If the tag
810+
/// contains something else other than text, an error is returned, but if it
811+
/// contains a text and something else (for example, `<item>text<tag/></item>`),
812+
/// then the trail is just ignored;
813+
/// - structs and maps are deserialized using new [`MapAccess`];
814+
/// - enums:
815+
/// - in case of [`DeEvent::Text`] event the text content is deserialized as
816+
/// a `$text` variant. Enum content is deserialized from the text using
817+
/// [`SimpleTypeDeserializer`];
818+
/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
819+
/// an enum tag, and the content inside are deserialized as an enum content.
820+
/// Depending on a variant kind deserialization is performed as:
821+
/// - unit variants: consuming text content or a subtree;
822+
/// - newtype variants: forward deserialization to the inner type using
823+
/// this deserializer;
824+
/// - tuple variants: deserialize it as an `xs:list`;
825+
/// - struct variants: call [`deserialize_struct`] of this deserializer.
826+
///
827+
/// [`deserialize_tuple`]: #method.deserialize_tuple
828+
/// [`deserialize_struct`]: #method.deserialize_struct
720829
struct SeqItemDeserializer<'de, 'a, 'm, R, E>
721830
where
722831
R: XmlRead<'de>,
@@ -754,7 +863,6 @@ where
754863

755864
forward!(deserialize_unit);
756865

757-
forward!(deserialize_map);
758866
forward!(deserialize_struct(
759867
name: &'static str,
760868
fields: &'static [&'static str]
@@ -766,7 +874,6 @@ where
766874
));
767875

768876
forward!(deserialize_any);
769-
forward!(deserialize_ignored_any);
770877

771878
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
772879
where
@@ -775,6 +882,20 @@ where
775882
deserialize_option!(self.map.de, self, visitor)
776883
}
777884

885+
/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
886+
/// with the [`SimpleTypeDeserializer`].
887+
fn deserialize_newtype_struct<V>(
888+
mut self,
889+
_name: &'static str,
890+
visitor: V,
891+
) -> Result<V::Value, Self::Error>
892+
where
893+
V: Visitor<'de>,
894+
{
895+
let text = self.read_string()?;
896+
visitor.visit_newtype_struct(SimpleTypeDeserializer::from_text(text))
897+
}
898+
778899
/// This method deserializes a sequence inside of element that itself is a
779900
/// sequence element:
780901
///
@@ -787,34 +908,12 @@ where
787908
/// ...
788909
/// </>
789910
/// ```
790-
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
911+
fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
791912
where
792913
V: Visitor<'de>,
793914
{
794-
match self.map.de.next()? {
795-
DeEvent::Text(e) => {
796-
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
797-
}
798-
// This is a sequence element. We cannot treat it as another flatten
799-
// sequence if type will require `deserialize_seq` We instead forward
800-
// it to `xs:simpleType` implementation
801-
DeEvent::Start(e) => {
802-
let value = match self.map.de.next()? {
803-
DeEvent::Text(e) => {
804-
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
805-
}
806-
e => Err(DeError::Unsupported(
807-
format!("unsupported event {:?}", e).into(),
808-
)),
809-
};
810-
// TODO: May be assert that here we expect only matching closing tag?
811-
self.map.de.read_to_end(e.name())?;
812-
value
813-
}
814-
// SAFETY: we use that deserializer only when Start(element) or Text
815-
// event was peeked already
816-
_ => unreachable!(),
817-
}
915+
let text = self.read_string()?;
916+
SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
818917
}
819918

820919
#[inline]

src/de/mod.rs

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,18 +1904,6 @@ macro_rules! deserialize_primitives {
19041904
self.deserialize_unit(visitor)
19051905
}
19061906

1907-
/// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
1908-
fn deserialize_newtype_struct<V>(
1909-
self,
1910-
_name: &'static str,
1911-
visitor: V,
1912-
) -> Result<V::Value, DeError>
1913-
where
1914-
V: Visitor<'de>,
1915-
{
1916-
self.deserialize_tuple(1, visitor)
1917-
}
1918-
19191907
/// Representation of tuples the same as [sequences](#method.deserialize_seq).
19201908
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
19211909
where
@@ -1937,13 +1925,32 @@ macro_rules! deserialize_primitives {
19371925
self.deserialize_tuple(len, visitor)
19381926
}
19391927

1928+
/// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1929+
/// with empty name and fields.
1930+
#[inline]
1931+
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1932+
where
1933+
V: Visitor<'de>,
1934+
{
1935+
self.deserialize_struct("", &[], visitor)
1936+
}
1937+
19401938
/// Identifiers represented as [strings](#method.deserialize_str).
19411939
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
19421940
where
19431941
V: Visitor<'de>,
19441942
{
19451943
self.deserialize_str(visitor)
19461944
}
1945+
1946+
/// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1947+
#[inline]
1948+
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1949+
where
1950+
V: Visitor<'de>,
1951+
{
1952+
self.deserialize_unit(visitor)
1953+
}
19471954
};
19481955
}
19491956

@@ -2820,30 +2827,36 @@ where
28202827
}
28212828
}
28222829

2823-
fn deserialize_enum<V>(
2830+
/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2831+
/// with the same deserializer.
2832+
fn deserialize_newtype_struct<V>(
28242833
self,
28252834
_name: &'static str,
2826-
_variants: &'static [&'static str],
28272835
visitor: V,
28282836
) -> Result<V::Value, DeError>
28292837
where
28302838
V: Visitor<'de>,
28312839
{
2832-
visitor.visit_enum(var::EnumAccess::new(self))
2840+
visitor.visit_newtype_struct(self)
28332841
}
28342842

2835-
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2843+
fn deserialize_enum<V>(
2844+
self,
2845+
_name: &'static str,
2846+
_variants: &'static [&'static str],
2847+
visitor: V,
2848+
) -> Result<V::Value, DeError>
28362849
where
28372850
V: Visitor<'de>,
28382851
{
2839-
visitor.visit_seq(self)
2852+
visitor.visit_enum(var::EnumAccess::new(self))
28402853
}
28412854

2842-
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
2855+
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
28432856
where
28442857
V: Visitor<'de>,
28452858
{
2846-
self.deserialize_struct("", &[], visitor)
2859+
visitor.visit_seq(self)
28472860
}
28482861

28492862
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
@@ -2853,39 +2866,13 @@ where
28532866
deserialize_option!(self, self, visitor)
28542867
}
28552868

2856-
/// Always call `visitor.visit_unit()` because returned value ignored in any case.
2857-
///
2858-
/// This method consumes any single [event][DeEvent] except the [`Start`]
2859-
/// event, in which case all events up to and including corresponding [`End`]
2860-
/// event will be consumed.
2861-
///
2862-
/// This method returns error if current event is [`End`] or [`Eof`].
2863-
///
2864-
/// [`Start`]: DeEvent::Start
2865-
/// [`End`]: DeEvent::End
2866-
/// [`Eof`]: DeEvent::Eof
2867-
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2868-
where
2869-
V: Visitor<'de>,
2870-
{
2871-
match self.next()? {
2872-
DeEvent::Start(e) => self.read_to_end(e.name())?,
2873-
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2874-
DeEvent::Eof => return Err(DeError::UnexpectedEof),
2875-
_ => (),
2876-
}
2877-
visitor.visit_unit()
2878-
}
2879-
28802869
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
28812870
where
28822871
V: Visitor<'de>,
28832872
{
28842873
match self.peek()? {
2885-
DeEvent::Start(_) => self.deserialize_map(visitor),
2886-
// Redirect to deserialize_unit in order to consume an event and return an appropriate error
2887-
DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
2888-
_ => self.deserialize_string(visitor),
2874+
DeEvent::Text(_) => self.deserialize_str(visitor),
2875+
_ => self.deserialize_map(visitor),
28892876
}
28902877
}
28912878
}

0 commit comments

Comments
 (0)