11//! Contains high-level interface for an events-based XML emitter.
22
3+ use crate :: encoding:: UTF8_BOM ;
34use crate :: errors:: { Error , Result } ;
45use crate :: events:: { attributes:: Attribute , BytesCData , BytesStart , BytesText , Event } ;
56use std:: io:: Write ;
67
8+ /// Writer-side encoding schemes supported by quick-xml.
9+ ///
10+ /// Currently, `quick-xml` only supports UTF-8 as an output encoding as the `encoding_rs`
11+ /// library does not provide encoders for any other encodings. If you need to write UTF-16
12+ /// encoded XML, consider writing the XML with a UTF-8 encoding and then re-encoding the file.
13+ #[ derive( Clone , Debug ) ]
14+ pub enum EncodingScheme {
15+ /// UTF-8 text with no "BOM". This is the default, and recommended value.
16+ Utf8 ,
17+ /// UTF-8 with a "BOM" identifier. The standard recommends against this but some software
18+ /// struggles to detect the encoding properly if it is not present.
19+ Utf8WithBom ,
20+ }
21+
22+ impl Default for EncodingScheme {
23+ fn default ( ) -> Self {
24+ Self :: Utf8
25+ }
26+ }
27+
728/// XML writer.
829///
930/// Writes XML `Event`s to a `Write` implementor.
@@ -57,6 +78,8 @@ pub struct Writer<W: Write> {
5778 /// underlying writer
5879 writer : W ,
5980 indent : Option < Indentation > ,
81+ encoding : EncodingScheme ,
82+ first_write : bool ,
6083}
6184
6285impl < W : Write > Writer < W > {
@@ -65,14 +88,34 @@ impl<W: Write> Writer<W> {
6588 Writer {
6689 writer : inner,
6790 indent : None ,
91+ encoding : EncodingScheme :: default ( ) ,
92+ first_write : false ,
6893 }
6994 }
7095
71- /// Creates a Writer with configured whitespace indents from a generic Write
96+ /// Creates a Writer from a generic Write implementor with configured whitespace indents
7297 pub fn new_with_indent ( inner : W , indent_char : u8 , indent_size : usize ) -> Writer < W > {
7398 Writer {
7499 writer : inner,
75100 indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
101+ encoding : EncodingScheme :: default ( ) ,
102+ first_write : true ,
103+ }
104+ }
105+
106+ /// Creates a Writer from a generic Write implementor with configured whitespace indents and a
107+ /// specified encoding scheme.
108+ pub fn new_with_indent_and_encoding (
109+ inner : W ,
110+ indent_char : u8 ,
111+ indent_size : usize ,
112+ encoding_scheme : EncodingScheme ,
113+ ) -> Writer < W > {
114+ Writer {
115+ writer : inner,
116+ indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
117+ encoding : encoding_scheme,
118+ first_write : true ,
76119 }
77120 }
78121
@@ -129,7 +172,15 @@ impl<W: Write> Writer<W> {
129172
130173 /// Writes bytes
131174 #[ inline]
132- pub fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
175+ pub ( crate ) fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
176+ // The BOM should be the very first thing written to the file, but it should only be written once
177+ if self . first_write {
178+ match self . encoding {
179+ EncodingScheme :: Utf8WithBom => self . writer . write_all ( UTF8_BOM ) ?,
180+ _ => ( ) ,
181+ }
182+ self . first_write = false ;
183+ }
133184 self . writer . write_all ( value) . map_err ( Error :: Io )
134185 }
135186
@@ -579,4 +630,23 @@ mod indentation {
579630</outer>"#
580631 ) ;
581632 }
633+
634+ #[ test]
635+ fn write_utf8_with_bom ( ) {
636+ let mut buffer = Vec :: new ( ) ;
637+ let mut writer =
638+ Writer :: new_with_indent_and_encoding ( & mut buffer, b' ' , 4 , EncodingScheme :: Utf8WithBom ) ;
639+
640+ writer
641+ . create_element ( "paired" )
642+ . with_attribute ( ( "attr1" , "value1" ) )
643+ . with_attribute ( ( "attr2" , "value2" ) )
644+ . write_text_content ( BytesText :: new ( "text" ) )
645+ . expect ( "failure" ) ;
646+
647+ assert_eq ! (
648+ & buffer,
649+ "\u{FEFF} <paired attr1=\" value1\" attr2=\" value2\" >text</paired>" . as_bytes( )
650+ ) ;
651+ }
582652}
0 commit comments