Skip to content

Commit 1953e4f

Browse files
committed
Rust: handle UTF-8 decoding errors
1 parent 37f264d commit 1953e4f

File tree

1 file changed

+40
-3
lines changed

1 file changed

+40
-3
lines changed

rust/extractor/src/main.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,48 @@
11
use anyhow::Context;
22
use ra_ap_ide_db::line_index::LineIndex;
33
use ra_ap_parser::Edition;
4+
use std::borrow::Cow;
45
mod archive;
56
mod config;
67
pub mod generated;
78
mod translate;
89
pub mod trap;
910
use ra_ap_syntax::ast::SourceFile;
10-
use ra_ap_syntax::AstNode;
11+
use ra_ap_syntax::{AstNode, SyntaxError, TextRange, TextSize};
12+
13+
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
14+
let mut iter = v.utf8_chunks();
15+
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
16+
let valid = chunk.valid();
17+
let invalid = chunk.invalid();
18+
if invalid.is_empty() {
19+
debug_assert_eq!(valid.len(), v.len());
20+
return (Cow::Borrowed(valid), None);
21+
}
22+
(valid, invalid)
23+
} else {
24+
return (Cow::Borrowed(""), None);
25+
};
26+
27+
const REPLACEMENT: &str = "\u{FFFD}";
28+
let error_start = first_valid.len() as u32;
29+
let error_end = error_start + first_invalid.len() as u32;
30+
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
31+
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
32+
let mut res = String::with_capacity(v.len());
33+
res.push_str(first_valid);
34+
35+
res.push_str(REPLACEMENT);
36+
37+
for chunk in iter {
38+
res.push_str(chunk.valid());
39+
if !chunk.invalid().is_empty() {
40+
res.push_str(REPLACEMENT);
41+
}
42+
}
43+
44+
(Cow::Owned(res), Some(error))
45+
}
1146

1247
fn extract(
1348
archiver: &archive::Archiver,
@@ -18,13 +53,15 @@ fn extract(
1853
let file = std::fs::canonicalize(&file).unwrap_or(file);
1954
archiver.archive(&file);
2055
let input = std::fs::read(&file)?;
21-
let input = String::from_utf8(input)?;
56+
let (input, err) = from_utf8_lossy(&input);
2257
let line_index = LineIndex::new(&input);
2358
let display_path = file.to_string_lossy();
2459
let mut trap = traps.create("source", &file);
2560
let label = trap.emit_file(&file);
2661
let mut translator = translate::Translator::new(trap, label, line_index);
27-
62+
if let Some(err) = err {
63+
translator.emit_parse_error(display_path.as_ref(), err);
64+
}
2865
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
2966
for err in parse.errors() {
3067
translator.emit_parse_error(display_path.as_ref(), err);

0 commit comments

Comments
 (0)