Skip to content

Commit b042236

Browse files
committed
feat(parser): Recover on unterminated string literals
1 parent b1e40d3 commit b042236

File tree

6 files changed

+118
-64
lines changed

6 files changed

+118
-64
lines changed

base/src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ impl<T> Errors<T> {
6363
pub fn iter(&self) -> slice::Iter<T> {
6464
self.errors.iter()
6565
}
66+
67+
pub fn drain(
68+
&mut self,
69+
range: impl std::ops::RangeBounds<usize>,
70+
) -> impl Iterator<Item = T> + '_ {
71+
self.errors.drain(range)
72+
}
6673
}
6774

6875
impl<T> Index<usize> for Errors<T> {

parser/src/layout.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ where
130130
{
131131
pub fn new(tokens: Tokens) -> Layout<'input, Tokens> {
132132
Layout {
133-
tokens: tokens,
133+
tokens,
134134
unprocessed_tokens: Vec::new(),
135135
indent_levels: Contexts::new(),
136136
}

parser/src/lib.rs

Lines changed: 60 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -422,33 +422,17 @@ where
422422
Id: Clone + AsRef<str> + std::fmt::Debug,
423423
S: ?Sized + ParserSource,
424424
{
425-
let layout = Layout::new(Tokenizer::new(input));
426-
427-
let mut parse_errors = Errors::new();
428-
429-
let result = grammar::TopExprParser::new().parse(
430-
&input,
431-
type_cache,
432-
arena,
433-
symbols,
434-
&mut parse_errors,
435-
&mut TempVecs::new(),
436-
layout,
437-
);
438-
439-
match result {
440-
Ok(expr) => {
441-
if parse_errors.has_errors() {
442-
Err((Some(expr), transform_errors(input.span(), parse_errors)))
443-
} else {
444-
Ok(expr)
445-
}
446-
}
447-
Err(err) => {
448-
parse_errors.push(err);
449-
Err((None, transform_errors(input.span(), parse_errors)))
450-
}
451-
}
425+
parse_with(input, &mut |parse_errors, layout| {
426+
grammar::TopExprParser::new().parse(
427+
&input,
428+
type_cache,
429+
arena,
430+
symbols,
431+
parse_errors,
432+
&mut TempVecs::new(),
433+
layout,
434+
)
435+
})
452436
}
453437

454438
pub fn parse_expr<'ast>(
@@ -475,34 +459,65 @@ where
475459
Id: Clone + Eq + Hash + AsRef<str> + ::std::fmt::Debug,
476460
S: ?Sized + ParserSource,
477461
{
478-
let layout = Layout::new(Tokenizer::new(input));
462+
parse_with(input, &mut |parse_errors, layout| {
463+
let type_cache = TypeCache::default();
464+
465+
grammar::ReplLineParser::new()
466+
.parse(
467+
&input,
468+
&type_cache,
469+
arena,
470+
symbols,
471+
parse_errors,
472+
&mut TempVecs::new(),
473+
layout,
474+
)
475+
.map(|o| o.map(|b| *b))
476+
})
477+
.map_err(|(opt, err)| (opt.and_then(|opt| opt), err))
478+
}
479+
480+
fn parse_with<'ast, 'input, S, T>(
481+
input: &'input S,
482+
parse: &mut dyn FnMut(
483+
ErrorEnv<'_, 'input>,
484+
Layout<'input, &mut Tokenizer<'input>>,
485+
) -> Result<
486+
T,
487+
lalrpop_util::ParseError<BytePos, Token<&'input str>, Spanned<Error, BytePos>>,
488+
>,
489+
) -> Result<T, (Option<T>, ParseErrors)>
490+
where
491+
S: ?Sized + ParserSource,
492+
{
493+
let mut tokenizer = Tokenizer::new(input);
494+
let layout = Layout::new(&mut tokenizer);
479495

480496
let mut parse_errors = Errors::new();
481497

482-
let type_cache = TypeCache::default();
498+
let result = parse(&mut parse_errors, layout);
499+
500+
let mut all_errors = transform_errors(input.span(), parse_errors);
483501

484-
let result = grammar::ReplLineParser::new().parse(
485-
&input,
486-
&type_cache,
487-
arena,
488-
symbols,
489-
&mut parse_errors,
490-
&mut TempVecs::new(),
491-
layout,
492-
);
502+
all_errors.extend(tokenizer.errors.drain(..).map(|sp_error| {
503+
pos::spanned2(
504+
sp_error.span.start().absolute,
505+
sp_error.span.end().absolute,
506+
sp_error.value.into(),
507+
)
508+
}));
493509

494510
match result {
495-
Ok(repl_line) => {
496-
let repl_line = repl_line.map(|b| *b);
497-
if parse_errors.has_errors() {
498-
Err((repl_line, transform_errors(input.span(), parse_errors)))
511+
Ok(value) => {
512+
if all_errors.has_errors() {
513+
Err((Some(value), all_errors))
499514
} else {
500-
Ok(repl_line)
515+
Ok(value)
501516
}
502517
}
503518
Err(err) => {
504-
parse_errors.push(err);
505-
Err((None, transform_errors(input.span(), parse_errors)))
519+
all_errors.push(Error::from_lalrpop(input.span(), err));
520+
Err((None, all_errors))
506521
}
507522
}
508523
}

parser/src/token.rs

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use self::Error::*;
99
use crate::{
1010
base::{
1111
ast::is_operator_byte,
12+
error::Errors,
1213
metadata::{Comment, CommentType},
1314
pos::{self, BytePos, Column, Line, Location, Spanned},
1415
},
@@ -351,6 +352,7 @@ pub struct Tokenizer<'input> {
351352
input: &'input str,
352353
chars: CharLocations<'input>,
353354
start_index: BytePos,
355+
pub errors: Errors<SpError>,
354356
}
355357

356358
impl<'input> Tokenizer<'input> {
@@ -364,6 +366,7 @@ impl<'input> Tokenizer<'input> {
364366
input: input.src(),
365367
chars,
366368
start_index: input.start_index(),
369+
errors: Errors::new(),
367370
}
368371
}
369372

@@ -541,7 +544,16 @@ impl<'input> Tokenizer<'input> {
541544
}
542545
}
543546

544-
self.error(start, UnterminatedStringLiteral)
547+
let end = self.chars.location;
548+
549+
self.errors
550+
.push(pos::spanned2(start, end, UnterminatedStringLiteral));
551+
552+
Ok(pos::spanned2(
553+
start,
554+
end,
555+
Token::StringLiteral(StringLiteral::Escaped(self.slice(content_start, end))),
556+
))
545557
}
546558

547559
fn raw_string_literal(&mut self, start: Location) -> Result<SpannedToken<'input>, SpError> {
@@ -845,11 +857,18 @@ mod test {
845857
fn tokenizer<'input>(
846858
input: &'input str,
847859
) -> impl Iterator<Item = Result<SpannedToken<'input>, SpError>> + 'input {
848-
Box::new(Tokenizer::new(input).take_while(|token| match *token {
849-
Ok(Spanned {
850-
value: Token::EOF, ..
851-
}) => false,
852-
_ => true,
860+
let mut tokenizer = Tokenizer::new(input);
861+
Box::new(std::iter::from_fn(move || {
862+
let result = tokenizer.next()?;
863+
if let Some(err) = tokenizer.errors.pop() {
864+
return Some(Err(err));
865+
}
866+
match result {
867+
Ok(Spanned {
868+
value: Token::EOF, ..
869+
}) => None,
870+
result => Some(result),
871+
}
853872
}))
854873
}
855874

@@ -1019,7 +1038,11 @@ mod test {
10191038
fn string_literal_unterminated() {
10201039
assert_eq!(
10211040
tokenizer(r#"foo "bar\"\n baz"#).last(),
1022-
Some(error(loc(4), UnterminatedStringLiteral))
1041+
Some(Err(pos::spanned2(
1042+
loc(4),
1043+
loc(16),
1044+
UnterminatedStringLiteral
1045+
)))
10231046
);
10241047
}
10251048

parser/tests/error_handling.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,17 @@ y
6969
assert_eq!(remove_expected(result.unwrap_err().1), errors);
7070
}
7171

72-
#[test]
73-
fn unclosed_string() {
74-
let _ = ::env_logger::try_init();
75-
76-
let result = parse(
77-
r#"
72+
test_parse_error! {
73+
unclosed_string,
74+
r#"
7875
"abc
79-
"#,
80-
);
81-
assert!(result.is_err());
76+
123"#,
77+
|_: base::ast::ArenaRef<'_, '_, String>| string("abc\n123"),
78+
{
79+
let error = Error::Token(parser::TokenizeError::UnterminatedStringLiteral);
80+
let span = pos::span(BytePos::from(0), BytePos::from(0));
81+
ParseErrors::from(vec![pos::spanned(span, error)])
82+
}
8283
}
8384

8485
#[test]

parser/tests/support/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ pub fn int<'a>(i: i64) -> SpExpr<'a> {
222222
no_loc(Expr::Literal(Literal::Int(i)))
223223
}
224224

225+
pub fn string<'a>(s: &str) -> SpExpr<'a> {
226+
no_loc(Expr::Literal(Literal::String(s.into())))
227+
}
228+
225229
pub fn let_<'ast>(
226230
arena: ast::ArenaRef<'_, 'ast, String>,
227231
s: &str,
@@ -556,9 +560,13 @@ macro_rules! test_parse_error {
556560
let _ = ::env_logger::try_init();
557561
let text = $text;
558562
let result = parse(text);
559-
assert!(result.is_err(), "{:#?}", result.unwrap());
563+
assert!(
564+
result.is_err(),
565+
"Expected error but got expression: {:#?}",
566+
result.unwrap()
567+
);
560568
let (expr, err) = result.unwrap_err();
561-
let expr = clear_span(expr.unwrap());
569+
let expr = clear_span(expr.expect("Recovered expression"));
562570
mk_ast_arena!(arena);
563571
fn call<A, R>(a: A, f: impl FnOnce(A) -> R) -> R {
564572
f(a)

0 commit comments

Comments
 (0)