From 604c6229e708052a5e9d45fc8377ba28fb367ab9 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Mon, 11 Aug 2025 06:16:34 +0200 Subject: [PATCH] feat: hint at unterminated strings in unknown prefix errors When encountering 'unknown literal prefix' errors, check for unbalanced quotes in recent code and suggest checking for unterminated string literals. --- crates/parser/src/lexed_str.rs | 54 ++++++++++++++++++- .../unterminated_string_unknown_prefix.rast | 15 ++++++ .../err/unterminated_string_unknown_prefix.rs | 5 ++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rast create mode 100644 crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rs diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index 8fff1c3db748..8d9bf914422b 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -149,6 +149,48 @@ impl<'a> Converter<'a> { } } + /// Check for likely unterminated string by analyzing STRING token content + fn has_likely_unterminated_string(&self) -> bool { + // Look at the most recent STRING token content + if let Some(last_token_idx) = self.res.kind.len().checked_sub(1) { + if self.res.kind[last_token_idx] == STRING { + let start = self.res.start[last_token_idx] as usize; + let content = &self.res.text[start..self.offset]; + + // Check for obvious code patterns in string content + let has_code_patterns = content.contains('(') + && ( + content.contains("//") || // Comments in strings = suspicious + content.contains(";\n") + // Statements in strings = suspicious + ); + + return has_code_patterns; + } else { + // Look for any STRING token in recent history + for i in (0..=last_token_idx).rev().take(5) { + if self.res.kind[i] == STRING { + let start = self.res.start[i] as usize; + let end = if i + 1 < self.res.start.len() { + self.res.start[i + 1] as usize + } else { + self.offset + }; + let content = &self.res.text[start..end]; + + let has_code_patterns = content.contains('(') + && (content.contains("//") || content.contains(";\n")); + + if has_code_patterns { + return true; + } + } + } + } + } + false + } + fn finalize_with_eof(mut self) -> LexedStr<'a> { self.res.push(EOF, self.offset); self.res @@ -267,7 +309,17 @@ impl<'a> Converter<'a> { rustc_lexer::TokenKind::Unknown => ERROR, rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT, rustc_lexer::TokenKind::UnknownPrefix => { - errors.push("unknown literal prefix".into()); + let has_unterminated = self.has_likely_unterminated_string(); + + let error_msg = if has_unterminated { + format!( + "unknown literal prefix `{}` (note: check for unterminated string literal)", + token_text + ) + } else { + "unknown literal prefix".to_owned() + }; + errors.push(error_msg); IDENT } rustc_lexer::TokenKind::Eof => EOF, diff --git a/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rast b/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rast new file mode 100644 index 000000000000..f7f24ca3f810 --- /dev/null +++ b/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rast @@ -0,0 +1,15 @@ +FN_KW "fn" +WHITESPACE " " +IDENT "main" +L_PAREN "(" +R_PAREN ")" +WHITESPACE " " +L_CURLY "{" +WHITESPACE "\n " +IDENT "hello" +L_PAREN "(" +STRING "\"world);\n // a bunch of code was here\n env(\"FLAGS" +STRING "\", \"" +MINUS "-" +IDENT "help" error: unknown literal prefix `help` (note: check for unterminated string literal) +STRING "\")\n}" error: Missing trailing `"` symbol to terminate the string literal diff --git a/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rs b/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rs new file mode 100644 index 000000000000..338b9582605b --- /dev/null +++ b/crates/parser/test_data/lexer/err/unterminated_string_unknown_prefix.rs @@ -0,0 +1,5 @@ +fn main() { + hello("world); + // a bunch of code was here + env("FLAGS", "-help") +} \ No newline at end of file