Skip to content

Commit 86a73d5

Browse files
committed
Added constructors that include native utf-8 validation
The goal is to adopt this functionality into the standard constructors, but backwards compatibility is tricky - this gives more room to experiment first. Reader::from_reader_validating() Reader::from_file_validating() NsReader::from_reader_validating() NsReader::from_file_validating() (when "encoding" feature is not enabled)
1 parent 5f32a25 commit 86a73d5

4 files changed

Lines changed: 95 additions & 3 deletions

File tree

Changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@
3232
- `Attribute::decode_and_unescape_value_with()`
3333

3434
Deprecated functions now behaves the same as newly added.
35+
-[#947]: Add new constructors to `Reader` and `NsReader` that perform automatic streaming UTF-8
36+
validation on the underlying input. Validation failures are raised as errors when the `Reader`
37+
is used. These APIs are currently considered "experimental".
38+
- `Reader::from_reader_validating()`
39+
- `Reader::from_file_validating()`
40+
- `NsReader::from_reader_validating()`
41+
- `NsReader::from_file_validating()`
3542

3643
### Bug Fixes
3744

@@ -48,6 +55,7 @@
4855
properly normalized EOLs. To get the previous behavior use `.read_text().decode()?`.
4956
- [#947]: Bumped MSRV from 1.59 (Feb 2022) to 1.79 (June 2024)
5057

58+
[#947]: https://github.com/tafia/quick-xml/pull/947
5159
[#371]: https://github.com/tafia/quick-xml/issues/371
5260
[#914]: https://github.com/tafia/quick-xml/pull/914
5361
[#938]: https://github.com/tafia/quick-xml/pull/938

src/reader/buffered_reader.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::fs::File;
55
use std::io::{self, BufRead, BufReader};
66
use std::path::Path;
77

8-
use crate::encoding;
8+
use crate::encoding::{self, Utf8BytesReader};
99
use crate::errors::{Error, Result};
1010
use crate::events::{BytesText, Event};
1111
use crate::name::QName;
@@ -578,6 +578,18 @@ impl Reader<BufReader<File>> {
578578
}
579579
}
580580

581+
impl Reader<Utf8BytesReader<File>> {
582+
/// Creates an XML reader from a file path.
583+
///
584+
/// The reader will validate that all bytes read from the file are valid UTF-8.
585+
/// If invalid UTF-8 is encountered, an error will be returned when reading events.
586+
#[cfg(not(feature = "encoding"))]
587+
pub fn from_file_validating<P: AsRef<Path>>(path: P) -> Result<Self> {
588+
let file = File::open(path)?;
589+
Ok(Self::from_reader_validating(file))
590+
}
591+
}
592+
581593
#[cfg(test)]
582594
mod test {
583595
use crate::reader::test::check;

src/reader/mod.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ use encoding_rs::Encoding;
55
use std::io;
66
use std::ops::Range;
77

8-
use crate::encoding::Decoder;
98
#[cfg(feature = "encoding")]
109
use crate::encoding::DetectedEncoding;
10+
use crate::encoding::{Decoder, Utf8BytesReader};
1111
use crate::errors::{Error, IllFormedError, SyntaxError};
1212
use crate::events::{BytesRef, Event};
1313
use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
@@ -778,7 +778,7 @@ impl<R> Reader<R> {
778778
/// Creates a `Reader` that reads from a given reader.
779779
pub fn from_reader(reader: R) -> Self {
780780
Self {
781-
reader,
781+
reader: reader,
782782
state: ReaderState::default(),
783783
}
784784
}
@@ -794,6 +794,35 @@ impl<R> Reader<R> {
794794
}
795795
}
796796

797+
impl<R: std::io::Read> Reader<Utf8BytesReader<R>> {
798+
/// Creates a `Reader` that reads from a given reader with UTF-8 validation.
799+
///
800+
/// This constructor wraps the input reader in a [`Utf8BytesReader`], which validates
801+
/// that all bytes read are valid UTF-8. If invalid UTF-8 is encountered, an error
802+
/// will be returned when reading events.
803+
///
804+
/// # Experimental
805+
///
806+
/// **This API is experimental and may change (or disappear) in future versions.**
807+
///
808+
/// # Examples
809+
///
810+
/// ```
811+
/// use quick_xml::reader::Reader;
812+
/// use std::io::Cursor;
813+
///
814+
/// let data = Cursor::new(b"<tag>content</tag>".to_vec());
815+
/// let reader = Reader::from_reader_validating(data);
816+
/// // Reader will validate UTF-8 as it reads
817+
/// ```
818+
///
819+
/// [`Utf8BytesReader`]: crate::encoding::Utf8BytesReader
820+
#[cfg(not(feature = "encoding"))]
821+
pub fn from_reader_validating(reader: R) -> Self {
822+
Self::from_reader(Utf8BytesReader::new(reader))
823+
}
824+
}
825+
797826
/// Getters
798827
impl<R> Reader<R> {
799828
/// Consumes `Reader` returning the underlying reader

src/reader/ns_reader.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::io::{BufRead, BufReader};
99
use std::ops::Deref;
1010
use std::path::Path;
1111

12+
use crate::encoding::Utf8BytesReader;
1213
use crate::errors::Result;
1314
use crate::events::{BytesText, Event};
1415
use crate::name::{NamespaceResolver, QName, ResolveResult};
@@ -50,6 +51,36 @@ impl<R> NsReader<R> {
5051
}
5152
}
5253

54+
impl<R: std::io::Read> NsReader<Utf8BytesReader<R>> {
55+
/// Creates an `NsReader` that reads from a given reader with UTF-8 validation.
56+
///
57+
/// This constructor wraps the input reader in a [`Utf8BytesReader`], which validates
58+
/// that all bytes read are valid UTF-8. If invalid UTF-8 is encountered, an error
59+
/// will be returned when reading events.
60+
///
61+
/// # Experimental
62+
///
63+
/// **This API is experimental and may change (or disappear) in future versions.**
64+
///
65+
/// # Examples
66+
///
67+
/// ```
68+
/// use quick_xml::reader::NsReader;
69+
/// use std::io::Cursor;
70+
///
71+
/// let data = Cursor::new(b"<tag>content</tag>".to_vec());
72+
/// let reader = NsReader::from_reader_validating(data);
73+
/// // Reader will validate UTF-8 as it reads
74+
/// ```
75+
///
76+
/// [`Utf8BytesReader`]: crate::encoding::Utf8BytesReader
77+
#[inline]
78+
#[cfg(not(feature = "encoding"))]
79+
pub fn from_reader_validating(reader: R) -> Self {
80+
Self::new(Reader::from_reader_validating(reader))
81+
}
82+
}
83+
5384
/// Private methods
5485
impl<R> NsReader<R> {
5586
#[inline]
@@ -442,6 +473,18 @@ impl NsReader<BufReader<File>> {
442473
}
443474
}
444475

476+
impl NsReader<Utf8BytesReader<File>> {
477+
/// Creates an XML reader from a file path.
478+
///
479+
/// If the [`encoding`] feature is *not* enabled, the reader will validate that all
480+
/// bytes read from the file are valid UTF-8. If invalid UTF-8 is encountered, an
481+
/// error will be returned when reading events.
482+
#[cfg(not(feature = "encoding"))]
483+
pub fn from_file_validating<P: AsRef<Path>>(path: P) -> Result<Self> {
484+
Ok(Self::new(Reader::from_file_validating(path)?))
485+
}
486+
}
487+
445488
impl<'i> NsReader<&'i [u8]> {
446489
/// Creates an XML reader from a string slice.
447490
#[inline]

0 commit comments

Comments
 (0)