Skip to content

Commit c383849

Browse files
authored
Merge pull request #685 from Mingun/quoting-level
Add ability to set desired level of escaping special characters in XML
2 parents 9fb181a + d9de2d8 commit c383849

File tree

6 files changed

+194
-9
lines changed

6 files changed

+194
-9
lines changed

Changelog.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ configuration is serializable.
2020
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
2121
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
2222
direct access to fields of config using `reader.config_mut().<...>`.
23-
- #[#684]: Added a method `Config::enable_all_checks` to turn on or off all
23+
- [#684]: Added a method `Config::enable_all_checks` to turn on or off all
2424
well-formedness checks.
25+
- [#362]: Added `escape::minimal_escape()` which escapes only `&` and `<`.
26+
- [#362]: Added `BytesCData::minimal_escape()` which escapes only `&` and `<`.
27+
- [#362]: Added `Serializer::set_quote_level()` which allow to set desired level of escaping.
2528

2629
### Bug Fixes
2730

@@ -47,7 +50,9 @@ configuration is serializable.
4750
- [#684]: Now `<??>` parsed as `Event::PI` with empty content instead of raising
4851
syntax error.
4952
- [#684]: Now `<?xml?>` parsed as `Event::Decl` instead of `Event::PI`.
53+
- [#362]: Now default quote level is `QuoteLevel::Partial` when using serde serializer.
5054

55+
[#362]: https://github.com/tafia/quick-xml/issues/362
5156
[#513]: https://github.com/tafia/quick-xml/issues/513
5257
[#622]: https://github.com/tafia/quick-xml/issues/622
5358
[#675]: https://github.com/tafia/quick-xml/pull/675

src/escapei.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ impl std::error::Error for EscapeError {}
7171
/// | `&` | `&amp;`
7272
/// | `'` | `&apos;`
7373
/// | `"` | `&quot;`
74+
///
75+
/// This function performs following replacements:
76+
///
77+
/// | Character | Replacement
78+
/// |-----------|------------
79+
/// | `<` | `&lt;`
80+
/// | `>` | `&gt;`
81+
/// | `&` | `&amp;`
82+
/// | `'` | `&apos;`
83+
/// | `"` | `&quot;`
7484
pub fn escape(raw: &str) -> Cow<str> {
7585
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
7686
}
@@ -88,10 +98,35 @@ pub fn escape(raw: &str) -> Cow<str> {
8898
/// | `<` | `&lt;`
8999
/// | `>` | `&gt;`
90100
/// | `&` | `&amp;`
101+
///
102+
/// This function performs following replacements:
103+
///
104+
/// | Character | Replacement
105+
/// |-----------|------------
106+
/// | `<` | `&lt;`
107+
/// | `>` | `&gt;`
108+
/// | `&` | `&amp;`
91109
pub fn partial_escape(raw: &str) -> Cow<str> {
92110
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
93111
}
94112

113+
/// XML standard [requires] that only `<` and `&` was escaped in text content or
114+
/// attribute value. All other characters not necessary to be escaped, although
115+
/// for compatibility with SGML they also should be escaped. Practically, escaping
116+
/// only those characters is enough.
117+
///
118+
/// This function performs following replacements:
119+
///
120+
/// | Character | Replacement
121+
/// |-----------|------------
122+
/// | `<` | `&lt;`
123+
/// | `&` | `&amp;`
124+
///
125+
/// [requires]: https://www.w3.org/TR/xml11/#syntax
126+
pub fn minimal_escape(raw: &str) -> Cow<str> {
127+
_escape(raw, |ch| matches!(ch, b'<' | b'&'))
128+
}
129+
95130
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
96131
/// `&`, `'`, `"`) with their corresponding xml escaped value.
97132
pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str> {
@@ -1788,6 +1823,7 @@ fn test_escape() {
17881823
assert_eq!(unchanged, Cow::Borrowed("test"));
17891824
assert!(matches!(unchanged, Cow::Borrowed(_)));
17901825

1826+
assert_eq!(escape("<&\"'>"), "&lt;&amp;&quot;&apos;&gt;");
17911827
assert_eq!(escape("<test>"), "&lt;test&gt;");
17921828
assert_eq!(escape("\"a\"bc"), "&quot;a&quot;bc");
17931829
assert_eq!(escape("\"a\"b&c"), "&quot;a&quot;b&amp;c");
@@ -1806,6 +1842,7 @@ fn test_partial_escape() {
18061842
assert_eq!(unchanged, Cow::Borrowed("test"));
18071843
assert!(matches!(unchanged, Cow::Borrowed(_)));
18081844

1845+
assert_eq!(partial_escape("<&\"'>"), "&lt;&amp;\"'&gt;");
18091846
assert_eq!(partial_escape("<test>"), "&lt;test&gt;");
18101847
assert_eq!(partial_escape("\"a\"bc"), "\"a\"bc");
18111848
assert_eq!(partial_escape("\"a\"b&c"), "\"a\"b&amp;c");
@@ -1814,3 +1851,16 @@ fn test_partial_escape() {
18141851
"prefix_\"a\"b&amp;&lt;&gt;c"
18151852
);
18161853
}
1854+
1855+
#[test]
1856+
fn test_minimal_escape() {
1857+
assert_eq!(minimal_escape("test"), Cow::Borrowed("test"));
1858+
assert_eq!(minimal_escape("<&\"'>"), "&lt;&amp;\"'>");
1859+
assert_eq!(minimal_escape("<test>"), "&lt;test>");
1860+
assert_eq!(minimal_escape("\"a\"bc"), "\"a\"bc");
1861+
assert_eq!(minimal_escape("\"a\"b&c"), "\"a\"b&amp;c");
1862+
assert_eq!(
1863+
minimal_escape("prefix_\"a\"b&<>c"),
1864+
"prefix_\"a\"b&amp;&lt;>c"
1865+
);
1866+
}

src/events/mod.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ use std::str::from_utf8;
4646

4747
use crate::encoding::Decoder;
4848
use crate::errors::{Error, IllFormedError, Result};
49-
use crate::escape::{escape, partial_escape, unescape_with};
49+
use crate::escape::{escape, minimal_escape, partial_escape, unescape_with};
5050
use crate::name::{LocalName, QName};
5151
use crate::reader::is_whitespace;
5252
use crate::utils::write_cow_string;
@@ -913,6 +913,30 @@ impl<'a> BytesCData<'a> {
913913
))
914914
}
915915

916+
/// Converts this CDATA content to an escaped version, that can be written
917+
/// as an usual text in XML. This method escapes only those characters that
918+
/// must be escaped according to the [specification].
919+
///
920+
/// This function performs following replacements:
921+
///
922+
/// | Character | Replacement
923+
/// |-----------|------------
924+
/// | `<` | `&lt;`
925+
/// | `&` | `&amp;`
926+
///
927+
/// [specification]: https://www.w3.org/TR/xml11/#syntax
928+
pub fn minimal_escape(self) -> Result<BytesText<'a>> {
929+
let decoded = self.decode()?;
930+
Ok(BytesText::wrap(
931+
match minimal_escape(&decoded) {
932+
// Because result is borrowed, no replacements was done and we can use original content
933+
Cow::Borrowed(_) => self.content,
934+
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
935+
},
936+
Decoder::utf8(),
937+
))
938+
}
939+
916940
/// Gets content of this text buffer in the specified encoding
917941
pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
918942
Ok(match &self.content {

src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ pub mod errors;
5858
mod escapei;
5959
pub mod escape {
6060
//! Manage xml character escapes
61-
pub use crate::escapei::{escape, partial_escape, unescape, unescape_with, EscapeError};
61+
pub use crate::escapei::{
62+
escape, minimal_escape, partial_escape, unescape, unescape_with, EscapeError,
63+
};
6264
}
6365
pub mod events;
6466
pub mod name;

src/se/mod.rs

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
461461
Self {
462462
ser: ContentSerializer {
463463
writer,
464-
level: QuoteLevel::Full,
464+
level: QuoteLevel::Partial,
465465
indent: Indent::None,
466466
write_indent: false,
467467
expand_empty_elements: false,
@@ -526,7 +526,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
526526
Ok(Self {
527527
ser: ContentSerializer {
528528
writer,
529-
level: QuoteLevel::Full,
529+
level: QuoteLevel::Partial,
530530
indent: Indent::None,
531531
write_indent: false,
532532
expand_empty_elements: false,
@@ -574,6 +574,14 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
574574
self
575575
}
576576

577+
/// Set the level of quoting used when writing texts
578+
///
579+
/// Default: [`QuoteLevel::Minimal`]
580+
pub fn set_quote_level(&mut self, level: QuoteLevel) -> &mut Self {
581+
self.ser.level = level;
582+
self
583+
}
584+
577585
/// Set the indent object for a serializer
578586
pub(crate) fn set_indent(&mut self, indent: Indent<'r>) -> &mut Self {
579587
self.ser.indent = indent;
@@ -779,3 +787,99 @@ impl<'w, 'r, W: Write> ser::Serializer for Serializer<'w, 'r, W> {
779787
}
780788
}
781789
}
790+
791+
#[cfg(test)]
792+
mod quote_level {
793+
use super::*;
794+
use pretty_assertions::assert_eq;
795+
use serde::Serialize;
796+
797+
#[derive(Debug, PartialEq, Serialize)]
798+
struct Element(&'static str);
799+
800+
#[derive(Debug, PartialEq, Serialize)]
801+
struct Example {
802+
#[serde(rename = "@attribute")]
803+
attribute: &'static str,
804+
element: Element,
805+
}
806+
807+
#[test]
808+
fn default_() {
809+
let example = Example {
810+
attribute: "special chars: &, <, >, \", '",
811+
element: Element("special chars: &, <, >, \", '"),
812+
};
813+
814+
let mut buffer = String::new();
815+
let ser = Serializer::new(&mut buffer);
816+
817+
example.serialize(ser).unwrap();
818+
assert_eq!(
819+
buffer,
820+
"<Example attribute=\"special chars: &amp;, &lt;, &gt;, &quot;, '\">\
821+
<element>special chars: &amp;, &lt;, &gt;, \", '</element>\
822+
</Example>"
823+
);
824+
}
825+
826+
#[test]
827+
fn minimal() {
828+
let example = Example {
829+
attribute: "special chars: &, <, >, \", '",
830+
element: Element("special chars: &, <, >, \", '"),
831+
};
832+
833+
let mut buffer = String::new();
834+
let mut ser = Serializer::new(&mut buffer);
835+
ser.set_quote_level(QuoteLevel::Minimal);
836+
837+
example.serialize(ser).unwrap();
838+
assert_eq!(
839+
buffer,
840+
"<Example attribute=\"special chars: &amp;, &lt;, >, &quot;, '\">\
841+
<element>special chars: &amp;, &lt;, >, \", '</element>\
842+
</Example>"
843+
);
844+
}
845+
846+
#[test]
847+
fn partial() {
848+
let example = Example {
849+
attribute: "special chars: &, <, >, \", '",
850+
element: Element("special chars: &, <, >, \", '"),
851+
};
852+
853+
let mut buffer = String::new();
854+
let mut ser = Serializer::new(&mut buffer);
855+
ser.set_quote_level(QuoteLevel::Partial);
856+
857+
example.serialize(ser).unwrap();
858+
assert_eq!(
859+
buffer,
860+
"<Example attribute=\"special chars: &amp;, &lt;, &gt;, &quot;, '\">\
861+
<element>special chars: &amp;, &lt;, &gt;, \", '</element>\
862+
</Example>"
863+
);
864+
}
865+
866+
#[test]
867+
fn full() {
868+
let example = Example {
869+
attribute: "special chars: &, <, >, \", '",
870+
element: Element("special chars: &, <, >, \", '"),
871+
};
872+
873+
let mut buffer = String::new();
874+
let mut ser = Serializer::new(&mut buffer);
875+
ser.set_quote_level(QuoteLevel::Full);
876+
877+
example.serialize(ser).unwrap();
878+
assert_eq!(
879+
buffer,
880+
"<Example attribute=\"special chars: &amp;, &lt;, &gt;, &quot;, &apos;\">\
881+
<element>special chars: &amp;, &lt;, &gt;, &quot;, &apos;</element>\
882+
</Example>"
883+
);
884+
}
885+
}

tests/serde-se.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,13 +1807,13 @@ mod with_root {
18071807
serialize_as!(char_lt: '<' => "<root>&lt;</root>");
18081808
serialize_as!(char_gt: '>' => "<root>&gt;</root>");
18091809
serialize_as!(char_amp: '&' => "<root>&amp;</root>");
1810-
serialize_as!(char_apos: '\'' => "<root>&apos;</root>");
1811-
serialize_as!(char_quot: '"' => "<root>&quot;</root>");
1810+
serialize_as!(char_apos: '\'' => "<root>'</root>");
1811+
serialize_as!(char_quot: '"' => "<root>\"</root>");
18121812
// FIXME: Probably we should trim only for specified types when deserialize
18131813
serialize_as_only!(char_space: ' ' => "<root> </root>");
18141814

18151815
serialize_as!(str_non_escaped: "non-escaped string"; &str => "<root>non-escaped string</root>");
1816-
serialize_as!(str_escaped: "<\"escaped & string'>"; String => "<root>&lt;&quot;escaped &amp; string&apos;&gt;</root>");
1816+
serialize_as!(str_escaped: "<\"escaped & string'>"; String => "<root>&lt;\"escaped &amp; string'&gt;</root>");
18171817

18181818
err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet"));
18191819

@@ -1839,7 +1839,7 @@ mod with_root {
18391839
serialize_as!(tuple:
18401840
// Use to_string() to get owned type that is required for deserialization
18411841
("<\"&'>".to_string(), "with\t\r\n spaces", 3usize)
1842-
=> "<root>&lt;&quot;&amp;&apos;&gt;</root>\
1842+
=> "<root>&lt;\"&amp;'&gt;</root>\
18431843
<root>with\t\r\n spaces</root>\
18441844
<root>3</root>");
18451845
serialize_as!(tuple_struct:

0 commit comments

Comments
 (0)