Skip to content

Commit 7924fdb

Browse files
committed
Add new minimal_escape function to escape only strictly necessary characters
1 parent ab06d5e commit 7924fdb

File tree

4 files changed

+80
-2
lines changed

4 files changed

+80
-2
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ configuration is serializable.
2222
direct access to fields of config using `reader.config_mut().<...>`.
2323
- [#684]: Added a method `Config::enable_all_checks` to turn on or off all
2424
well-formedness checks.
25+
- [#362]: Added `escape::minimal_escape()` which escapes only `&` and `<`.
26+
- [#362]: Added `BytesCData::minimal_escape()` which escapes only `&` and `<`.
2527

2628
### Bug Fixes
2729

src/escapei.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ impl std::error::Error for EscapeError {}
7171
/// | `&` | `&amp;`
7272
/// | `'` | `&apos;`
7373
/// | `"` | `&quot;`
74+
///
75+
/// This function performs following replacements:
76+
///
77+
/// | Character | Replacement
78+
/// |-----------|------------
79+
/// | `<` | `&lt;`
80+
/// | `>` | `&gt;`
81+
/// | `&` | `&amp;`
82+
/// | `'` | `&apos;`
83+
/// | `"` | `&quot;`
7484
pub fn escape(raw: &str) -> Cow<str> {
7585
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
7686
}
@@ -88,10 +98,35 @@ pub fn escape(raw: &str) -> Cow<str> {
8898
/// | `<` | `&lt;`
8999
/// | `>` | `&gt;`
90100
/// | `&` | `&amp;`
101+
///
102+
/// This function performs following replacements:
103+
///
104+
/// | Character | Replacement
105+
/// |-----------|------------
106+
/// | `<` | `&lt;`
107+
/// | `>` | `&gt;`
108+
/// | `&` | `&amp;`
91109
pub fn partial_escape(raw: &str) -> Cow<str> {
92110
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
93111
}
94112

113+
/// XML standard [requires] that only `<` and `&` was escaped in text content or
114+
/// attribute value. All other characters not necessary to be escaped, although
115+
/// for compatibility with SGML they also should be escaped. Practically, escaping
116+
/// only those characters is enough.
117+
///
118+
/// This function performs following replacements:
119+
///
120+
/// | Character | Replacement
121+
/// |-----------|------------
122+
/// | `<` | `&lt;`
123+
/// | `&` | `&amp;`
124+
///
125+
/// [requires]: https://www.w3.org/TR/xml11/#syntax
126+
pub fn minimal_escape(raw: &str) -> Cow<str> {
127+
_escape(raw, |ch| matches!(ch, b'<' | b'&'))
128+
}
129+
95130
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
96131
/// `&`, `'`, `"`) with their corresponding xml escaped value.
97132
pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str> {
@@ -1788,6 +1823,7 @@ fn test_escape() {
17881823
assert_eq!(unchanged, Cow::Borrowed("test"));
17891824
assert!(matches!(unchanged, Cow::Borrowed(_)));
17901825

1826+
assert_eq!(escape("<&\"'>"), "&lt;&amp;&quot;&apos;&gt;");
17911827
assert_eq!(escape("<test>"), "&lt;test&gt;");
17921828
assert_eq!(escape("\"a\"bc"), "&quot;a&quot;bc");
17931829
assert_eq!(escape("\"a\"b&c"), "&quot;a&quot;b&amp;c");
@@ -1806,6 +1842,7 @@ fn test_partial_escape() {
18061842
assert_eq!(unchanged, Cow::Borrowed("test"));
18071843
assert!(matches!(unchanged, Cow::Borrowed(_)));
18081844

1845+
assert_eq!(partial_escape("<&\"'>"), "&lt;&amp;\"'&gt;");
18091846
assert_eq!(partial_escape("<test>"), "&lt;test&gt;");
18101847
assert_eq!(partial_escape("\"a\"bc"), "\"a\"bc");
18111848
assert_eq!(partial_escape("\"a\"b&c"), "\"a\"b&amp;c");
@@ -1814,3 +1851,16 @@ fn test_partial_escape() {
18141851
"prefix_\"a\"b&amp;&lt;&gt;c"
18151852
);
18161853
}
1854+
1855+
#[test]
1856+
fn test_minimal_escape() {
1857+
assert_eq!(minimal_escape("test"), Cow::Borrowed("test"));
1858+
assert_eq!(minimal_escape("<&\"'>"), "&lt;&amp;\"'>");
1859+
assert_eq!(minimal_escape("<test>"), "&lt;test>");
1860+
assert_eq!(minimal_escape("\"a\"bc"), "\"a\"bc");
1861+
assert_eq!(minimal_escape("\"a\"b&c"), "\"a\"b&amp;c");
1862+
assert_eq!(
1863+
minimal_escape("prefix_\"a\"b&<>c"),
1864+
"prefix_\"a\"b&amp;&lt;>c"
1865+
);
1866+
}

src/events/mod.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ use std::str::from_utf8;
4646

4747
use crate::encoding::Decoder;
4848
use crate::errors::{Error, IllFormedError, Result};
49-
use crate::escape::{escape, partial_escape, unescape_with};
49+
use crate::escape::{escape, minimal_escape, partial_escape, unescape_with};
5050
use crate::name::{LocalName, QName};
5151
use crate::reader::is_whitespace;
5252
use crate::utils::write_cow_string;
@@ -913,6 +913,30 @@ impl<'a> BytesCData<'a> {
913913
))
914914
}
915915

916+
/// Converts this CDATA content to an escaped version, that can be written
917+
/// as an usual text in XML. This method escapes only those characters that
918+
/// must be escaped according to the [specification].
919+
///
920+
/// This function performs following replacements:
921+
///
922+
/// | Character | Replacement
923+
/// |-----------|------------
924+
/// | `<` | `&lt;`
925+
/// | `&` | `&amp;`
926+
///
927+
/// [specification]: https://www.w3.org/TR/xml11/#syntax
928+
pub fn minimal_escape(self) -> Result<BytesText<'a>> {
929+
let decoded = self.decode()?;
930+
Ok(BytesText::wrap(
931+
match minimal_escape(&decoded) {
932+
// Because result is borrowed, no replacements was done and we can use original content
933+
Cow::Borrowed(_) => self.content,
934+
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
935+
},
936+
Decoder::utf8(),
937+
))
938+
}
939+
916940
/// Gets content of this text buffer in the specified encoding
917941
pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
918942
Ok(match &self.content {

src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ pub mod errors;
5858
mod escapei;
5959
pub mod escape {
6060
//! Manage xml character escapes
61-
pub use crate::escapei::{escape, partial_escape, unescape, unescape_with, EscapeError};
61+
pub use crate::escapei::{
62+
escape, minimal_escape, partial_escape, unescape, unescape_with, EscapeError,
63+
};
6264
}
6365
pub mod events;
6466
pub mod name;

0 commit comments

Comments
 (0)