@@ -223,21 +223,27 @@ fn convert_file(buffer: &[u8], path: &std::path::Path) -> anyhow::Result<String>
223
223
String :: from_utf8_lossy ( buffer) . into_owned ( )
224
224
} ,
225
225
content_inspector:: ContentType :: UTF_16LE => {
226
- let mut decoded = String :: new ( ) ;
227
- let ( r, _) = encoding_rs:: UTF_16LE . new_decoder_with_bom_removal ( ) . decode_to_string_without_replacement ( buffer, & mut decoded, true ) ;
228
- match r {
229
- encoding_rs:: DecoderResult :: InputEmpty => { } ,
230
- _ => anyhow:: bail!( "Could not decode UTF-16 in {}" , path. display( ) ) ,
231
- }
226
+ // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
227
+ // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
228
+ // a buffer twice its size
229
+ let mut decoded = String :: with_capacity ( buffer. len ( ) * 2 ) ;
230
+ let ( r, written) = encoding_rs:: UTF_16LE . new_decoder_with_bom_removal ( ) . decode_to_string_without_replacement ( buffer, & mut decoded, true ) ;
231
+ let decoded = match r {
232
+ encoding_rs:: DecoderResult :: InputEmpty => decoded,
233
+ _ => anyhow:: bail!( "invalid UTF-16LE encoding at byte {} in {}" , written, path. display( ) ) ,
234
+ } ;
232
235
decoded
233
236
}
234
237
content_inspector:: ContentType :: UTF_16BE => {
235
- let mut decoded = String :: new ( ) ;
236
- let ( r, _) = encoding_rs:: UTF_16BE . new_decoder_with_bom_removal ( ) . decode_to_string_without_replacement ( buffer, & mut decoded, true ) ;
237
- match r {
238
- encoding_rs:: DecoderResult :: InputEmpty => { } ,
239
- _ => anyhow:: bail!( "Could not decode UTF-16 in {}" , path. display( ) ) ,
240
- }
238
+ // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
239
+ // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
240
+ // a buffer twice its size
241
+ let mut decoded = String :: with_capacity ( buffer. len ( ) * 2 ) ;
242
+ let ( r, written) = encoding_rs:: UTF_16BE . new_decoder_with_bom_removal ( ) . decode_to_string_without_replacement ( buffer, & mut decoded, true ) ;
243
+ let decoded = match r {
244
+ encoding_rs:: DecoderResult :: InputEmpty => decoded,
245
+ _ => anyhow:: bail!( "invalid UTF-16BE encoding at byte {} in {}" , written, path. display( ) ) ,
246
+ } ;
241
247
decoded
242
248
} ,
243
249
} ;
0 commit comments