11//! Contains high-level interface for an events-based XML emitter.
22
3+ use crate :: encoding:: UTF8_BOM ;
34use crate :: errors:: { Error , Result } ;
45use crate :: events:: { attributes:: Attribute , BytesCData , BytesStart , BytesText , Event } ;
56use std:: io:: Write ;
67
8+ /// Writer-side encoding schemes supported by quick-xml.
9+ ///
10+ /// Currently, `quick-xml` only supports UTF-8 as an output encoding as the `encoding_rs`
11+ /// library does not provide encoders for any other encodings. If you need to write UTF-16
12+ /// encoded XML, consider writing the XML with a UTF-8 encoding and then re-encoding the file.
13+ #[ derive( Clone , Debug ) ]
14+ pub enum EncodingScheme {
15+ /// UTF-8 text with no "BOM". This is the default, and recommended value.
16+ Utf8 ,
17+ /// UTF-8 with a "BOM" identifier. The standard recommends against this but some software
18+ /// requires it to be present.
19+ Utf8WithBom ,
20+ }
21+
22+ impl Default for EncodingScheme {
23+ fn default ( ) -> Self {
24+ Self :: Utf8
25+ }
26+ }
27+
728/// XML writer.
829///
930/// Writes XML `Event`s to a `Write` implementor.
@@ -57,6 +78,8 @@ pub struct Writer<W: Write> {
5778 /// underlying writer
5879 writer : W ,
5980 indent : Option < Indentation > ,
81+ encoding : EncodingScheme ,
82+ first_write : bool ,
6083}
6184
6285impl < W : Write > Writer < W > {
@@ -65,6 +88,8 @@ impl<W: Write> Writer<W> {
6588 Writer {
6689 writer : inner,
6790 indent : None ,
91+ encoding : EncodingScheme :: default ( ) ,
92+ first_write : false ,
6893 }
6994 }
7095
@@ -73,6 +98,23 @@ impl<W: Write> Writer<W> {
7398 Writer {
7499 writer : inner,
75100 indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
101+ encoding : EncodingScheme :: default ( ) ,
102+ first_write : true ,
103+ }
104+ }
105+
106+ /// Creates a Writer with configured whitespace indents from a generic Write
107+ pub fn new_with_indent_and_encoding (
108+ inner : W ,
109+ indent_char : u8 ,
110+ indent_size : usize ,
111+ encoding : EncodingScheme ,
112+ ) -> Writer < W > {
113+ Writer {
114+ writer : inner,
115+ indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
116+ encoding : encoding,
117+ first_write : true ,
76118 }
77119 }
78120
@@ -129,7 +171,15 @@ impl<W: Write> Writer<W> {
129171
130172 /// Writes bytes
131173 #[ inline]
132- pub fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
174+ fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
175+ // The BOM should be the very first thing written to the file, but it should only be written once
176+ if self . first_write {
177+ match self . encoding {
178+ EncodingScheme :: Utf8WithBom => self . writer . write_all ( UTF8_BOM ) ?,
179+ _ => ( ) ,
180+ }
181+ self . first_write = false ;
182+ }
133183 self . writer . write_all ( value) . map_err ( Error :: Io )
134184 }
135185
@@ -579,4 +629,23 @@ mod indentation {
579629</outer>"#
580630 ) ;
581631 }
632+
633+ #[ test]
634+ fn write_utf8_with_bom ( ) {
635+ let mut buffer = Vec :: new ( ) ;
636+ let mut writer =
637+ Writer :: new_with_indent_and_encoding ( & mut buffer, b' ' , 4 , EncodingScheme :: Utf8WithBom ) ;
638+
639+ writer
640+ . create_element ( "paired" )
641+ . with_attribute ( ( "attr1" , "value1" ) )
642+ . with_attribute ( ( "attr2" , "value2" ) )
643+ . write_text_content ( BytesText :: new ( "text" ) )
644+ . expect ( "failure" ) ;
645+
646+ assert_eq ! (
647+ & buffer,
648+ "\u{FEFF} <paired attr1=\" value1\" attr2=\" value2\" >text</paired>" . as_bytes( )
649+ ) ;
650+ }
582651}
0 commit comments