@@ -577,8 +577,24 @@ impl<'a> BytesText<'a> {
577577
578578 /// Decodes the content of the event.
579579 ///
580- /// This will allocate if the value contains any escape sequences or in
581- /// non-UTF-8 encoding.
580+ /// This will allocate if the value contains any escape sequences or in non-UTF-8
581+ /// encoding, or [EOL normalization] is required.
582+ ///
583+ /// Note, althougth you may use this library to parse HTML, you cannot use this
584+ /// method to get HTML content, because its returns normalized value: the following
585+ /// sequences are translated into a single `\n` (U+000a) character:
586+ ///
587+ /// - `\r\n`
588+ /// - `\r\x85`
589+ /// - `\r`
590+ /// - `\x85`
591+ /// - `\x2028`
592+ ///
593+ /// The text in HTML normally is not normalized in any way; normalization is
594+ /// performed only in limited contexts and [only for] `\r\n` and `\r`.
595+ ///
596+ /// [EOL normalization]: https://www.w3.org/TR/xml11/#sec-line-ends
597+ /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
582598 pub fn decode ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
583599 self . decoder . decode_cow ( & self . content )
584600 }
@@ -827,6 +843,25 @@ impl<'a> BytesCData<'a> {
827843 /// When this event produced by the XML reader, it uses the encoding information
828844 /// associated with that reader to interpret the raw bytes contained within this
829845 /// CDATA event.
846+ ///
847+ /// This will allocate if the value in non-UTF-8 encoding, or [EOL normalization]
848+ /// is required.
849+ ///
850+ /// Note, althougth you may use this library to parse HTML, you cannot use this
851+ /// method to get HTML content, because its returns normalized value: the following
852+ /// sequences are translated into a single `\n` (U+000a) character:
853+ ///
854+ /// - `\r\n`
855+ /// - `\r\x85`
856+ /// - `\r`
857+ /// - `\x85`
858+ /// - `\x2028`
859+ ///
860+ /// The text in HTML normally is not normalized in any way; normalization is
861+ /// performed only in limited contexts and [only for] `\r\n` and `\r`.
862+ ///
863+ /// [EOL normalization]: https://www.w3.org/TR/xml11/#sec-line-ends
864+ /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
830865 pub fn decode ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
831866 Ok ( self . decoder . decode_cow ( & self . content ) ?)
832867 }
@@ -1437,8 +1472,24 @@ impl<'a> BytesRef<'a> {
14371472
14381473 /// Decodes the content of the event.
14391474 ///
1440- /// This will allocate if the value contains any escape sequences or in
1441- /// non-UTF-8 encoding.
1475+ /// This will allocate if the value in non-UTF-8 encoding, or [EOL normalization]
1476+ /// is required.
1477+ ///
1478+ /// Note, althougth you may use this library to parse HTML, you cannot use this
1479+ /// method to get HTML content, because its returns normalized value: the following
1480+ /// sequences are translated into a single `\n` (U+000a) character:
1481+ ///
1482+ /// - `\r\n`
1483+ /// - `\r\x85`
1484+ /// - `\r`
1485+ /// - `\x85`
1486+ /// - `\x2028`
1487+ ///
1488+ /// The text in HTML normally is not normalized in any way; normalization is
1489+ /// performed only in limited contexts and [only for] `\r\n` and `\r`.
1490+ ///
1491+ /// [EOL normalization]: https://www.w3.org/TR/xml11/#sec-line-ends
1492+ /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
14421493 pub fn decode ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
14431494 self . decoder . decode_cow ( & self . content )
14441495 }
0 commit comments