@@ -184,16 +184,48 @@ pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str>
184184/// [`escape-html`]: ../index.html#escape-html
185185/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref
186186pub fn unescape ( raw : & str ) -> Result < Cow < str > , EscapeError > {
187- unescape_with ( raw, |_| None )
187+ unescape_with ( raw, resolve_predefined_entity )
188188}
189189
190190/// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into
191191/// their corresponding value, using a resolver function for custom entities.
192192///
193193/// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes].
194194///
195+ /// Predefined entities will be resolved _after_ trying to resolve with `resolve_entity`,
196+ /// which allows you to override default behavior which required in some XML dialects.
197+ ///
198+ /// Character references (`&#hh;`) cannot be overridden, they are resolved before
199+ /// calling `resolve_entity`.
200+ ///
201+ /// Note, that entities will not be resolved recursively. In order to satisfy the
202+ /// XML [requirements] you should unescape nested entities by yourself.
203+ ///
204+ /// # Example
205+ ///
206+ /// ```
207+ /// use quick_xml::escape::resolve_xml_entity;
208+ /// # use quick_xml::escape::unescape_with;
209+ /// # use pretty_assertions::assert_eq;
210+ /// let override_named_entities = |entity: &str| match entity {
211+ /// // Override standard entities
212+ /// "lt" => Some("FOO"),
213+ /// "gt" => Some("BAR"),
214+ /// // Resolve custom entities
215+ /// "baz" => Some("<"),
216+ /// // Delegate other entities to the default implementation
217+ /// _ => resolve_xml_entity(entity),
218+ /// };
219+ ///
220+ /// assert_eq!(
221+ /// unescape_with("&<test>&baz;", override_named_entities).unwrap(),
222+ /// "&FOOtestBAR<"
223+ /// );
224+ /// ```
225+ ///
195226/// [`escape-html`]: ../index.html#escape-html
196227/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref
228+ /// [requirements]: https://www.w3.org/TR/xml11/#intern-replacement
197229pub fn unescape_with < ' input , ' entity , F > (
198230 raw : & ' input str ,
199231 mut resolve_entity : F ,
@@ -221,8 +253,6 @@ where
221253 if let Some ( entity) = pat. strip_prefix ( '#' ) {
222254 let codepoint = parse_number ( entity, start..end) ?;
223255 unescaped. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
224- } else if let Some ( value) = named_entity ( pat) {
225- unescaped. push_str ( value) ;
226256 } else if let Some ( value) = resolve_entity ( pat) {
227257 unescaped. push_str ( value) ;
228258 } else {
@@ -248,10 +278,45 @@ where
248278 }
249279}
250280
251- #[ cfg( not( feature = "escape-html" ) ) ]
252- fn named_entity ( name : & str ) -> Option < & str > {
281+ /// Resolves predefined XML entities or all HTML5 entities depending on the feature
282+ /// [`escape-html`](https://docs.rs/quick-xml/latest/quick_xml/#escape-html).
283+ ///
284+ /// Behaves like [`resolve_xml_entity`] if feature is not enabled and as
285+ /// [`resolve_html5_entity`] if enabled.
286+ #[ inline]
287+ pub fn resolve_predefined_entity ( entity : & str ) -> Option < & ' static str > {
288+ #[ cfg( not( feature = "escape-html" ) ) ]
289+ {
290+ resolve_xml_entity ( entity)
291+ }
292+
293+ #[ cfg( feature = "escape-html" ) ]
294+ {
295+ resolve_html5_entity ( entity)
296+ }
297+ }
298+
299+ /// Resolves predefined XML entities. If specified entity is not a predefined XML
300+ /// entity, `None` is returned.
301+ ///
302+ /// The complete list of predefined entities are defined in the [specification].
303+ ///
304+ /// ```
305+ /// # use quick_xml::escape::resolve_xml_entity;
306+ /// # use pretty_assertions::assert_eq;
307+ /// assert_eq!(resolve_xml_entity("lt"), Some("<"));
308+ /// assert_eq!(resolve_xml_entity("gt"), Some(">"));
309+ /// assert_eq!(resolve_xml_entity("amp"), Some("&"));
310+ /// assert_eq!(resolve_xml_entity("apos"), Some("'"));
311+ /// assert_eq!(resolve_xml_entity("quot"), Some("\""));
312+ ///
313+ /// assert_eq!(resolve_xml_entity("foo"), None);
314+ /// ```
315+ ///
316+ /// [specification]: https://www.w3.org/TR/xml11/#sec-predefined-ent
317+ pub fn resolve_xml_entity ( entity : & str ) -> Option < & ' static str > {
253318 // match over strings are not allowed in const functions
254- let s = match name . as_bytes ( ) {
319+ let s = match entity . as_bytes ( ) {
255320 b"lt" => "<" ,
256321 b"gt" => ">" ,
257322 b"amp" => "&" ,
@@ -261,12 +326,13 @@ fn named_entity(name: &str) -> Option<&str> {
261326 } ;
262327 Some ( s)
263328}
264- #[ cfg( feature = "escape-html" ) ]
265- fn named_entity ( name : & str ) -> Option < & str > {
329+
330+ /// Resolves all HTML5 entities. For complete list see <https://dev.w3.org/html5/html-author/charref>.
331+ pub fn resolve_html5_entity ( entity : & str ) -> Option < & ' static str > {
266332 // imported from https://dev.w3.org/html5/html-author/charref
267333 // match over strings are not allowed in const functions
268334 //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json
269- let s = match name . as_bytes ( ) {
335+ let s = match entity . as_bytes ( ) {
270336 b"Tab" => "\u{09} " ,
271337 b"NewLine" => "\u{0A} " ,
272338 b"excl" => "\u{21} " ,
@@ -1804,10 +1870,7 @@ fn test_unescape_with() {
18041870 assert_eq ! ( unchanged, Cow :: Borrowed ( "test" ) ) ;
18051871 assert ! ( matches!( unchanged, Cow :: Borrowed ( _) ) ) ;
18061872
1807- assert_eq ! (
1808- unescape_with( "<test>" , custom_entities) . unwrap( ) ,
1809- "<test>"
1810- ) ;
1873+ assert ! ( unescape_with( "<" , custom_entities) . is_err( ) ) ;
18111874 assert_eq ! ( unescape_with( "0" , custom_entities) . unwrap( ) , "0" ) ;
18121875 assert_eq ! ( unescape_with( "0" , custom_entities) . unwrap( ) , "0" ) ;
18131876 assert_eq ! ( unescape_with( "&foo;" , custom_entities) . unwrap( ) , "BAR" ) ;
0 commit comments