Skip to content

Commit 038bfed

Browse files
authored
Merge pull request #395 from Mingun/xs-list
Add support for XML Schema `xs:list` type
2 parents 8fa6f1e + 08171e3 commit 038bfed

File tree

7 files changed

+2124
-33
lines changed

7 files changed

+2124
-33
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
way to access decoding functionality is via this struct
2323
- [#191]: New event variant `StartText` emitted for bytes before the XML declaration
2424
or a start comment or a tag. For streams with BOM this event will contain a BOM
25+
- [#395]: Add support for XML Schema `xs:list`
2526

2627
### Bug Fixes
2728

@@ -104,6 +105,7 @@
104105
[#387]: https://github.com/tafia/quick-xml/pull/387
105106
[#391]: https://github.com/tafia/quick-xml/pull/391
106107
[#393]: https://github.com/tafia/quick-xml/pull/393
108+
[#395]: https://github.com/tafia/quick-xml/pull/395
107109

108110
## 0.23.0 -- 2022-05-08
109111

src/de/map.rs

Lines changed: 162 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use crate::{
44
de::escape::EscapedDeserializer,
55
de::seq::{not_in, TagFilter},
6+
de::simple_type::SimpleTypeDeserializer,
67
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
78
errors::serialize::DeError,
89
events::attributes::IterState,
@@ -35,7 +36,10 @@ enum ValueSource {
3536
/// represented or by an ordinary text node, or by a CDATA node:
3637
///
3738
/// ```xml
38-
/// <...>text content for field value<...>
39+
/// <any-tag>
40+
/// <key>text content</key>
41+
/// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
42+
/// </any-tag>
3943
/// ```
4044
/// ```xml
4145
/// <any-tag>
@@ -200,8 +204,8 @@ where
200204
) -> Result<Self, DeError> {
201205
Ok(MapAccess {
202206
de,
207+
iter: IterState::new(start.name().as_ref().len(), false),
203208
start,
204-
iter: IterState::new(0, false),
205209
source: ValueSource::Unknown,
206210
fields,
207211
has_value_field: fields.contains(&INNER_VALUE),
@@ -226,8 +230,8 @@ where
226230
) -> Result<Option<K::Value>, Self::Error> {
227231
debug_assert_eq!(self.source, ValueSource::Unknown);
228232

229-
// FIXME: There error positions counted from end of tag name - need global position
230-
let slice = self.start.attributes_raw();
233+
// FIXME: There error positions counted from the start of tag name - need global position
234+
let slice = &self.start.buf;
231235
let decoder = self.de.reader.decoder();
232236

233237
if let Some(a) = self.iter.next(slice).transpose()? {
@@ -305,16 +309,12 @@ where
305309
seed: K,
306310
) -> Result<K::Value, Self::Error> {
307311
match std::mem::replace(&mut self.source, ValueSource::Unknown) {
308-
ValueSource::Attribute(value) => {
309-
let slice = self.start.attributes_raw();
310-
let decoder = self.de.reader.decoder();
311-
312-
seed.deserialize(EscapedDeserializer::new(
313-
Cow::Borrowed(&slice[value]),
314-
decoder,
315-
true,
316-
))
317-
}
312+
ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
313+
&self.start.buf,
314+
value,
315+
true,
316+
self.de.reader.decoder(),
317+
)),
318318
// This arm processes the following XML shape:
319319
// <any-tag>
320320
// text value
@@ -323,10 +323,21 @@ where
323323
// is implicit and equals to the `INNER_VALUE` constant, and the value
324324
// is a `Text` or a `CData` event (the value deserializer will see one
325325
// of that events)
326-
ValueSource::Text => seed.deserialize(MapValueDeserializer {
327-
map: self,
328-
allow_start: false,
329-
}),
326+
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
327+
ValueSource::Text => match self.de.next()? {
328+
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
329+
e.into_inner(),
330+
true,
331+
self.de.reader.decoder(),
332+
)),
333+
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
334+
e.into_inner(),
335+
false,
336+
self.de.reader.decoder(),
337+
)),
338+
// SAFETY: We set `Text` only when we seen `Text` or `CData`
339+
_ => unreachable!(),
340+
},
330341
// This arm processes the following XML shape:
331342
// <any-tag>
332343
// <any>...</any>
@@ -612,8 +623,140 @@ where
612623
DeEvent::Eof => Err(DeError::UnexpectedEof),
613624

614625
// Start(tag), Text, CData
615-
_ => seed.deserialize(&mut *self.map.de).map(Some),
626+
_ => seed
627+
.deserialize(SeqValueDeserializer { map: self.map })
628+
.map(Some),
616629
};
617630
}
618631
}
619632
}
633+
634+
////////////////////////////////////////////////////////////////////////////////////////////////////
635+
636+
/// A deserializer for a value of sequence.
637+
struct SeqValueDeserializer<'de, 'a, 'm, R>
638+
where
639+
R: XmlRead<'de>,
640+
{
641+
/// Access to the map that created this deserializer. Gives access to the
642+
/// context, such as list of fields, that current map known about.
643+
map: &'m mut MapAccess<'de, 'a, R>,
644+
}
645+
646+
impl<'de, 'a, 'm, R> SeqValueDeserializer<'de, 'a, 'm, R>
647+
where
648+
R: XmlRead<'de>,
649+
{
650+
/// Returns a text event, used inside [`deserialize_primitives!()`]
651+
#[inline]
652+
fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
653+
self.map.de.next_text_impl(unescape, true)
654+
}
655+
656+
/// Returns a decoder, used inside [`deserialize_primitives!()`]
657+
#[inline]
658+
fn decoder(&self) -> Decoder {
659+
self.map.de.reader.decoder()
660+
}
661+
}
662+
663+
impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqValueDeserializer<'de, 'a, 'm, R>
664+
where
665+
R: XmlRead<'de>,
666+
{
667+
type Error = DeError;
668+
669+
deserialize_primitives!(mut);
670+
671+
forward!(deserialize_option);
672+
forward!(deserialize_unit);
673+
forward!(deserialize_unit_struct(name: &'static str));
674+
forward!(deserialize_newtype_struct(name: &'static str));
675+
676+
forward!(deserialize_map);
677+
forward!(deserialize_struct(
678+
name: &'static str,
679+
fields: &'static [&'static str]
680+
));
681+
682+
forward!(deserialize_enum(
683+
name: &'static str,
684+
variants: &'static [&'static str]
685+
));
686+
687+
forward!(deserialize_any);
688+
forward!(deserialize_ignored_any);
689+
690+
/// Representation of tuples the same as [sequences](#method.deserialize_seq).
691+
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
692+
where
693+
V: Visitor<'de>,
694+
{
695+
self.deserialize_seq(visitor)
696+
}
697+
698+
/// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
699+
fn deserialize_tuple_struct<V>(
700+
self,
701+
_name: &'static str,
702+
len: usize,
703+
visitor: V,
704+
) -> Result<V::Value, DeError>
705+
where
706+
V: Visitor<'de>,
707+
{
708+
self.deserialize_tuple(len, visitor)
709+
}
710+
711+
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
712+
where
713+
V: Visitor<'de>,
714+
{
715+
match self.map.de.next()? {
716+
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
717+
// Comment to prevent auto-formatting and keep Text and Cdata similar
718+
e.into_inner(),
719+
true,
720+
self.map.de.reader.decoder(),
721+
)
722+
.deserialize_seq(visitor),
723+
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
724+
e.into_inner(),
725+
false,
726+
self.map.de.reader.decoder(),
727+
)
728+
.deserialize_seq(visitor),
729+
// This is a sequence element. We cannot treat it as another flatten
730+
// sequence if type will require `deserialize_seq` We instead forward
731+
// it to `xs:simpleType` implementation
732+
DeEvent::Start(e) => {
733+
let value = match self.map.de.next()? {
734+
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
735+
e.into_inner(),
736+
true,
737+
self.map.de.reader.decoder(),
738+
)
739+
.deserialize_seq(visitor),
740+
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
741+
e.into_inner(),
742+
false,
743+
self.map.de.reader.decoder(),
744+
)
745+
.deserialize_seq(visitor),
746+
e => Err(DeError::Custom(format!("Unsupported event {:?}", e))),
747+
};
748+
// TODO: May be assert that here we expect only matching closing tag?
749+
self.map.de.read_to_end(e.name())?;
750+
value
751+
}
752+
// SAFETY: we use that deserializer only when Start(element), Text,
753+
// or CData event Start(tag), Text, CData was peeked already
754+
_ => unreachable!(),
755+
}
756+
}
757+
758+
#[inline]
759+
fn is_human_readable(&self) -> bool {
760+
self.map.de.is_human_readable()
761+
}
762+
}

src/de/mod.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ macro_rules! deserialize_primitives {
215215
mod escape;
216216
mod map;
217217
mod seq;
218+
mod simple_type;
218219
mod var;
219220

220221
pub use crate::errors::serialize::DeError;
@@ -331,6 +332,21 @@ where
331332

332333
// TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean,
333334
// valid boolean representations are only "true", "false", "1", and "0"
335+
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
336+
where
337+
V: de::Visitor<'de>,
338+
{
339+
match value {
340+
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
341+
visitor.visit_bool(true)
342+
}
343+
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
344+
visitor.visit_bool(false)
345+
}
346+
_ => Err(DeError::InvalidBoolean(value.into())),
347+
}
348+
}
349+
334350
fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
335351
where
336352
V: Visitor<'de>,
@@ -339,15 +355,7 @@ where
339355
{
340356
let value = decoder.decode(value)?;
341357
// No need to unescape because valid boolean representations cannot be escaped
342-
match value.as_ref() {
343-
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
344-
visitor.visit_bool(true)
345-
}
346-
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
347-
visitor.visit_bool(false)
348-
}
349-
_ => Err(DeError::InvalidBoolean(value.into())),
350-
}
358+
str2bool(value.as_ref(), visitor)
351359
}
352360

353361
#[cfg(not(feature = "encoding"))]

0 commit comments

Comments
 (0)