@@ -110,23 +110,30 @@ where
110110 let mut escaped = None ;
111111 let mut last_pos = 0 ;
112112 for i in escapes {
113- let escaped = escaped. get_or_insert_with ( || Vec :: with_capacity ( raw. len ( ) ) ) ;
113+ // If we have an escape, the escaped string will be at least some larger than the raw string,
114+ // reserve a little more space, so we might not resize at all if only a few escapes are found.
115+ let escaped = escaped. get_or_insert_with ( || String :: with_capacity ( raw. len ( ) + 64 ) ) ;
114116 let byte = bytes[ i] ;
115- escaped. extend_from_slice ( & bytes[ last_pos..i] ) ;
117+ // SAFETY: the escapes iterator should only return indexes of bytes we know how to escape.
118+ // if one of those bytes are found, it _must_ be a complete character, so `i` must be a
119+ // character boundary.
120+ // last_pos will only be either 0 or i+1, and all supported chars are one byte long,
121+ // last_pos will also always be at a char boundary
122+ escaped. push_str ( & raw [ last_pos..i] ) ;
116123 match byte {
117- b'<' => escaped. extend_from_slice ( b "<") ,
118- b'>' => escaped. extend_from_slice ( b ">") ,
119- b'\'' => escaped. extend_from_slice ( b "'") ,
120- b'&' => escaped. extend_from_slice ( b "&") ,
121- b'"' => escaped. extend_from_slice ( b """) ,
124+ b'<' => escaped. push_str ( "<" ) ,
125+ b'>' => escaped. push_str ( ">" ) ,
126+ b'\'' => escaped. push_str ( "'" ) ,
127+ b'&' => escaped. push_str ( "&" ) ,
128+ b'"' => escaped. push_str ( """ ) ,
122129
123130 // This set of escapes handles characters that should be escaped
124131 // in elements of xs:lists, because those characters works as
125132 // delimiters of list elements
126- b'\t' => escaped. extend_from_slice ( b "	") ,
127- b'\n' => escaped. extend_from_slice ( b " ") ,
128- b'\r' => escaped. extend_from_slice ( b " ") ,
129- b' ' => escaped. extend_from_slice ( b " ") ,
133+ b'\t' => escaped. push_str ( "	" ) ,
134+ b'\n' => escaped. push_str ( " " ) ,
135+ b'\r' => escaped. push_str ( " " ) ,
136+ b' ' => escaped. push_str ( " " ) ,
130137 _ => unreachable ! (
131138 "Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped"
132139 ) ,
@@ -135,14 +142,8 @@ where
135142 }
136143
137144 if let Some ( mut escaped) = escaped {
138- if let Some ( raw) = bytes. get ( last_pos..) {
139- escaped. extend_from_slice ( raw) ;
140- }
141- // SAFETY: we operate on UTF-8 input and search for an one byte chars only,
142- // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
143- // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
144- // if unsafe code will be allowed
145- Cow :: Owned ( String :: from_utf8 ( escaped) . unwrap ( ) )
145+ escaped. push_str ( & raw [ last_pos..] ) ;
146+ Cow :: Owned ( escaped)
146147 } else {
147148 Cow :: Borrowed ( raw)
148149 }
@@ -182,17 +183,14 @@ where
182183 match iter. next ( ) {
183184 Some ( end) if bytes[ end] == b';' => {
184185 // append valid data
185- if unescaped. is_none ( ) {
186- unescaped = Some ( String :: with_capacity ( raw. len ( ) ) ) ;
187- }
188- let unescaped = unescaped. as_mut ( ) . expect ( "initialized" ) ;
186+ let unescaped = unescaped. get_or_insert_with ( || String :: with_capacity ( raw. len ( ) ) ) ;
189187 unescaped. push_str ( & raw [ last_end..start] ) ;
190188
191189 // search for character correctness
192190 let pat = & raw [ start + 1 ..end] ;
193191 if let Some ( entity) = pat. strip_prefix ( '#' ) {
194192 let codepoint = parse_number ( entity, start..end) ?;
195- unescaped. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
193+ unescaped. push ( codepoint) ;
196194 } else if let Some ( value) = named_entity ( pat) {
197195 unescaped. push_str ( value) ;
198196 } else if let Some ( value) = resolve_entity ( pat) {
0 commit comments