Skip to content

Commit 1771145

Browse files
authored
Merge pull request #75 from epage/utf16
fix: Actually read utf16
2 parents dcd3b8e + 7bf105e commit 1771145

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

src/blame.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -223,21 +223,27 @@ fn convert_file(buffer: &[u8], path: &std::path::Path) -> anyhow::Result<String>
223223
String::from_utf8_lossy(buffer).into_owned()
224224
},
225225
content_inspector::ContentType::UTF_16LE => {
226-
let mut decoded = String::new();
227-
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(buffer, &mut decoded, true);
228-
match r {
229-
encoding_rs::DecoderResult::InputEmpty => {},
230-
_ => anyhow::bail!("Could not decode UTF-16 in {}", path.display()),
231-
}
226+
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
227+
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
228+
// a buffer twice its size
229+
let mut decoded = String::with_capacity(buffer.len() * 2);
230+
let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(buffer, &mut decoded, true);
231+
let decoded = match r {
232+
encoding_rs::DecoderResult::InputEmpty => decoded,
233+
_ => anyhow::bail!("invalid UTF-16LE encoding at byte {} in {}", written, path.display()),
234+
};
232235
decoded
233236
}
234237
content_inspector::ContentType::UTF_16BE => {
235-
let mut decoded = String::new();
236-
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(buffer, &mut decoded, true);
237-
match r {
238-
encoding_rs::DecoderResult::InputEmpty => {},
239-
_ => anyhow::bail!("Could not decode UTF-16 in {}", path.display()),
240-
}
238+
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
239+
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
240+
// a buffer twice its size
241+
let mut decoded = String::with_capacity(buffer.len() * 2);
242+
let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(buffer, &mut decoded, true);
243+
let decoded = match r {
244+
encoding_rs::DecoderResult::InputEmpty => decoded,
245+
_ => anyhow::bail!("invalid UTF-16BE encoding at byte {} in {}", written, path.display()),
246+
};
241247
decoded
242248
},
243249
};

0 commit comments

Comments
 (0)