7777//! ```xml
7878//! <...>text<![CDATA[cdata]]>text</...>
7979//! ```
80- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
81- //!
82- //! Merging of the text / CDATA content is tracked in the issue [#474] and
83- //! will be available in the next release.
84- //! </div>
80+ //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
8581//! </td>
8682//! <td>
8783//!
9086//! - [`Cow<str>`]
9187//! - [`u32`], [`f32`] and other numeric types
9288//! - `enum`s, like
93- //! ```ignore
94- //! // FIXME: #474, merging mixed text / CDATA
95- //! // content does not work yet
89+ //! ```
9690//! # use pretty_assertions::assert_eq;
9791//! # use serde::Deserialize;
9892//! # #[derive(Debug, PartialEq)]
149143//! ...
150144//! ]]></...>
151145//! ```
152- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
153- //!
154- //! Merging of the text / CDATA content is tracked in the issue [#474] and
155- //! will be available in the next release.
156- //! </div>
157146//!
158147//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
159148//! </td>
162151//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
163152//!
164153//! ```
165- //! // FIXME: #474, merging mixed text / CDATA
166- //! // content does not work yet
167154//! type List = Vec<u32>;
168155//! ```
169156//!
520507//! }
521508//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
522509//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
523- //! # assert_eq!(AnyName::Text("text".into()), quick_xml::de::from_str(r#"text"#).unwrap());
524- //! # // TODO: After #474 parse mixed content
510+ //! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
525511//! ```
526512//! ```
527513//! # use pretty_assertions::assert_eq;
544530//! }
545531//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
546532//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
547- //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text"#).unwrap());
548- //! # // TODO: After #474 parse mixed content
533+ //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
549534//! ```
550535//! ```
551536//! # use pretty_assertions::assert_eq;
561546//! }
562547//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
563548//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
564- //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text"#).unwrap());
565- //! # // TODO: After #474 parse mixed content
549+ //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
566550//! ```
567551//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
568552//!
643627//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
644628//! # );
645629//! # assert_eq!(
646- //! # AnyName { field: (), any_name: Choice::Text("text".into()) },
647- //! # // TODO: After #474 parse mixed content
648- //! # quick_xml::de::from_str(r#"<any-tag field="...">text</any-tag>"#).unwrap(),
630+ //! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
631+ //! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
649632//! # );
650633//! ```
651634//! </td>
967950//! from the full element (`<one>...</one>`), so they could use the element name
968951//! to choose the right variant:
969952//!
970- //! ```ignore
971- //! // FIXME: #474
953+ //! ```
972954//! # use pretty_assertions::assert_eq;
973955//! # use serde::Deserialize;
974956//! # type One = ();
985967//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
986968//! # );
987969//! ```
988- //! ```ignore
989- //! // FIXME: #474, Custom("unknown variant `two`,
990- //! // expected `one`")
970+ //! ```
991971//! # use pretty_assertions::assert_eq;
992972//! # use serde::Deserialize;
993973//! # #[derive(Debug, PartialEq)]
1011991//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1012992//! so you cannot have two adjacent string types in your sequence.
1013993//! </div>
1014- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1015- //!
1016- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1017- //! will be available in the next release.
1018- //! </div>
1019994//! </td>
1020995//! </tr>
1021996//! <!-- 15 ==================================================================================== -->
10401015//! <td>
10411016//! A homogeneous sequence of elements with a fixed or dynamic size:
10421017//!
1043- //! ```ignore
1044- //! // FIXME: #474
1018+ //! ```
10451019//! # use pretty_assertions::assert_eq;
10461020//! # use serde::Deserialize;
10471021//! # #[derive(Debug, PartialEq)]
10591033//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
10601034//! # );
10611035//! ```
1062- //! ```ignore
1063- //! // FIXME: #474
1036+ //! ```
10641037//! # use pretty_assertions::assert_eq;
10651038//! # use serde::Deserialize;
10661039//! # #[derive(Debug, PartialEq)]
10881061//! NOTE: consequent text and CDATA nodes are merged into the one text node,
10891062//! so you cannot have two adjacent string types in your sequence.
10901063//! </div>
1091- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1092- //!
1093- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1094- //! will be available in the next release.
1095- //! </div>
10961064//! </td>
10971065//! </tr>
10981066//! <!-- 16 ==================================================================================== -->
11191087//!
11201088//! You MUST specify `#[serde(rename = "$value")]` on that field:
11211089//!
1122- //! ```ignore
1123- //! // FIXME: #474, Custom("duplicate field `$value`")
1090+ //! ```
11241091//! # use pretty_assertions::assert_eq;
11251092//! # use serde::Deserialize;
11261093//! # type One = ();
11571124//! # ).unwrap(),
11581125//! # );
11591126//! ```
1160- //! ```ignore
1161- //! // FIXME: #474, Custom("duplicate field `$value`")
1127+ //! ```
11621128//! # use pretty_assertions::assert_eq;
11631129//! # use serde::Deserialize;
11641130//! # type One = ();
12041170//! NOTE: consequent text and CDATA nodes are merged into the one text node,
12051171//! so you cannot have two adjacent string types in your sequence.
12061172//! </div>
1207- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1208- //!
1209- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1210- //! will be available in the next release.
1211- //! </div>
12121173//! </td>
12131174//! </tr>
12141175//! <!-- 17 ==================================================================================== -->
12371198//!
12381199//! You MUST specify `#[serde(rename = "$value")]` on that field:
12391200//!
1240- //! ```ignore
1241- //! // FIXME: #474
1201+ //! ```
12421202//! # use pretty_assertions::assert_eq;
12431203//! # use serde::Deserialize;
12441204//! # #[derive(Debug, PartialEq)]
12821242//! # ).unwrap(),
12831243//! # );
12841244//! ```
1285- //! ```ignore
1286- //! // FIXME: #474
1245+ //! ```
12871246//! # use pretty_assertions::assert_eq;
12881247//! # use serde::Deserialize;
12891248//! # #[derive(Debug, PartialEq)]
13321291//! NOTE: consequent text and CDATA nodes are merged into the one text node,
13331292//! so you cannot have two adjacent string types in your sequence.
13341293//! </div>
1335- //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
1336- //!
1337- //! Merging of the text / CDATA content is tracked in the issue [#474] and
1338- //! will be available in the next release.
1339- //! </div>
13401294//! </td>
13411295//! </tr>
13421296//! </tbody>
17201674//!
17211675//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
17221676//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1723- //! [#474]: https://github.com/tafia/quick-xml/issues/474
17241677//! [#497]: https://github.com/tafia/quick-xml/issues/497
17251678
17261679// Macros should be defined before the modules that using them
@@ -2004,6 +1957,53 @@ impl<'i, R: XmlRead<'i>> XmlReader<'i, R> {
20041957 )
20051958 }
20061959
1960+ /// Read all consequent [`Text`] and [`CData`] events until non-text event
1961+ /// occurs. Content of all events would be appended to `result` and returned
1962+ /// as [`DeEvent::Text`].
1963+ ///
1964+ /// [`Text`]: PayloadEvent::Text
1965+ /// [`CData`]: PayloadEvent::CData
1966+ fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < DeEvent < ' i > , DeError > {
1967+ loop {
1968+ match self . lookahead {
1969+ Ok ( PayloadEvent :: Text ( _) | PayloadEvent :: CData ( _) ) => {
1970+ let text = self . next_text ( ) ?;
1971+
1972+ let mut s = result. into_owned ( ) ;
1973+ s += & text;
1974+ result = Cow :: Owned ( s) ;
1975+ }
1976+ _ => break ,
1977+ }
1978+ }
1979+ Ok ( DeEvent :: Text ( result) )
1980+ }
1981+
1982+ /// Read one text event, panics if current event is not a text event
1983+ ///
1984+ /// |Event |XML |Handling
1985+ /// |-----------------------|---------------------------|----------------------------------------
1986+ /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
1987+ /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
1988+ /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
1989+ /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
1990+ /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
1991+ #[ inline( always) ]
1992+ fn next_text ( & mut self ) -> Result < Cow < ' i , str > , DeError > {
1993+ match self . next_impl ( ) ? {
1994+ PayloadEvent :: Text ( mut e) => {
1995+ if self . need_trim_end ( ) {
1996+ e. inplace_trim_end ( ) ;
1997+ }
1998+ Ok ( e. unescape ( ) ?)
1999+ }
2000+ PayloadEvent :: CData ( e) => Ok ( e. decode ( ) ?) ,
2001+
2002+ // SAFETY: this method is called only when we peeked Text or CData
2003+ _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2004+ }
2005+ }
2006+
20072007 /// Return an input-borrowing event.
20082008 fn next ( & mut self ) -> Result < DeEvent < ' i > , DeError > {
20092009 loop {
@@ -2014,9 +2014,9 @@ impl<'i, R: XmlRead<'i>> XmlReader<'i, R> {
20142014 if self . need_trim_end ( ) && e. inplace_trim_end ( ) {
20152015 continue ;
20162016 }
2017- Ok ( DeEvent :: Text ( e. unescape ( ) ?) )
2017+ self . drain_text ( e. unescape ( ) ?)
20182018 }
2019- PayloadEvent :: CData ( e) => Ok ( DeEvent :: Text ( e. decode ( ) ?) ) ,
2019+ PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
20202020 PayloadEvent :: Eof => Ok ( DeEvent :: Eof ) ,
20212021 } ;
20222022 }
@@ -2386,11 +2386,12 @@ where
23862386 self . read_string_impl ( true )
23872387 }
23882388
2389- /// Consumes a one XML element or an XML tree, returns associated text or
2389+ /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2390+ /// events, merge them into one string. If there are no such events, returns
23902391 /// an empty string.
23912392 ///
2392- /// If `allow_start` is `false`, then only one event is consumed. If that
2393- /// event is [`DeEvent::Start`], then [`DeError::UnexpectedStart`] is returned.
2393+ /// If `allow_start` is `false`, then only text events is consumed, for other
2394+ /// events an error is returned (see table below) .
23942395 ///
23952396 /// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
23962397 /// and all other content is skipped until corresponding end tag will be consumed.
@@ -2415,6 +2416,9 @@ where
24152416 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
24162417 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
24172418 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2419+ ///
2420+ /// [`Text`]: Event::Text
2421+ /// [`CData`]: Event::CData
24182422 fn read_string_impl ( & mut self , allow_start : bool ) -> Result < Cow < ' de , str > , DeError > {
24192423 match self . next ( ) ? {
24202424 DeEvent :: Text ( e) => Ok ( e) ,
@@ -3003,7 +3007,7 @@ mod tests {
30033007 ]
30043008 ) ;
30053009
3006- // Drop all events thet represents <target> tree. Now unconsumed XML looks like:
3010+ // Drop all events that represents <target> tree. Now unconsumed XML looks like:
30073011 //
30083012 // <skip>
30093013 // text
0 commit comments