diff --git a/cfgrammar/src/lib/yacc/ast.rs b/cfgrammar/src/lib/yacc/ast.rs index 2fd906f39..8abbca51b 100644 --- a/cfgrammar/src/lib/yacc/ast.rs +++ b/cfgrammar/src/lib/yacc/ast.rs @@ -107,6 +107,7 @@ pub struct Production { pub symbols: Vec, pub precedence: Option, pub action: Option, + pub prod_span: Span, } #[derive(Clone, Debug)] @@ -185,12 +186,14 @@ impl GrammarAST { symbols: Vec, precedence: Option, action: Option, + prod_span: Span, ) { self.rules[&rule_name].pidxs.push(self.prods.len()); self.prods.push(Production { symbols, precedence, action, + prod_span, }); } @@ -436,7 +439,7 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("B".to_string(), empty_span), None); - grm.add_prod("B".to_string(), vec![], None, None); + grm.add_prod("B".to_string(), vec![], None, None, empty_span); match grm.complete_and_validate() { Err(YaccGrammarError { kind: YaccGrammarErrorKind::InvalidStartRule(_), @@ -452,7 +455,7 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![], None, None); + grm.add_prod("A".to_string(), vec![], None, None, empty_span); assert!(grm.complete_and_validate().is_ok()); } @@ -463,8 +466,8 @@ mod test { grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); grm.add_rule(("B".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![rule("B")], None, None); - grm.add_prod("B".to_string(), vec![], None, None); + grm.add_prod("A".to_string(), vec![rule("B")], None, None, empty_span); + grm.add_prod("B".to_string(), vec![], None, None, empty_span); assert!(grm.complete_and_validate().is_ok()); } @@ -474,7 +477,7 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![rule("B")], None, None); + grm.add_prod("A".to_string(), vec![rule("B")], None, None, empty_span); match grm.complete_and_validate() { Err(YaccGrammarError { kind: YaccGrammarErrorKind::UnknownRuleRef(_), @@ -491,7 +494,7 @@ mod test { grm.tokens.insert("b".to_string()); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![token("b")], None, None); + grm.add_prod("A".to_string(), vec![token("b")], None, None, empty_span); assert!(grm.complete_and_validate().is_ok()); } @@ -504,7 +507,7 @@ mod test { grm.tokens.insert("b".to_string()); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![rule("b")], None, None); + grm.add_prod("A".to_string(), vec![rule("b")], None, None, empty_span); assert!(grm.complete_and_validate().is_err()); } @@ -514,7 +517,7 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![token("b")], None, None); + grm.add_prod("A".to_string(), vec![token("b")], None, None, empty_span); match grm.complete_and_validate() { Err(YaccGrammarError { kind: YaccGrammarErrorKind::UnknownToken(_), @@ -530,7 +533,13 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![rule("b"), token("b")], None, None); + grm.add_prod( + "A".to_string(), + vec![rule("b"), token("b")], + None, + None, + Span::new(0, 2), + ); match grm.complete_and_validate() { Err(YaccGrammarError { kind: YaccGrammarErrorKind::UnknownRuleRef(_), @@ -546,7 +555,7 @@ mod test { let empty_span = Span::new(2, 3); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![], None, None); + grm.add_prod("A".to_string(), vec![], None, None, empty_span); grm.epp .insert("k".to_owned(), (empty_span, ("v".to_owned(), empty_span))); match grm.complete_and_validate() { @@ -580,6 +589,7 @@ mod test { vec![token("b")], Some("b".to_string()), None, + empty_span, ); assert!(grm.complete_and_validate().is_ok()); } @@ -595,6 +605,7 @@ mod test { vec![token("b")], Some("b".to_string()), None, + empty_span, ); match grm.complete_and_validate() { Err(YaccGrammarError { @@ -619,11 +630,11 @@ mod test { let empty_span = Span::new(0, 0); grm.start = Some(("A".to_string(), empty_span)); grm.add_rule(("A".to_string(), empty_span), None); - grm.add_prod("A".to_string(), vec![], None, None); + grm.add_prod("A".to_string(), vec![], None, None, empty_span); grm.tokens.insert("b".to_string()); grm.spans.push(Span::new(4, 5)); grm.add_rule(("B".to_string(), Span::new(1, 2)), None); - grm.add_prod("B".to_string(), vec![token("b")], None, None); + grm.add_prod("B".to_string(), vec![token("b")], None, None, empty_span); assert_eq!( grm.unused_symbols() diff --git a/cfgrammar/src/lib/yacc/parser.rs b/cfgrammar/src/lib/yacc/parser.rs index 6244f10ba..73c73e88d 100644 --- a/cfgrammar/src/lib/yacc/parser.rs +++ b/cfgrammar/src/lib/yacc/parser.rs @@ -801,21 +801,37 @@ impl YaccParser { let mut prec = None; let mut action = None; i = self.parse_ws(i, true)?; + let mut pos_prod_start = i; + let mut pos_prod_end = None; while i < self.src.len() { if let Some(j) = self.lookahead_is("|", i) { - self.ast.add_prod(rn.clone(), syms, prec, action); + self.ast.add_prod( + rn.clone(), + syms, + prec, + action, + Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)), + ); syms = Vec::new(); prec = None; action = None; i = self.parse_ws(j, true)?; + pos_prod_start = i; continue; } else if let Some(j) = self.lookahead_is(";", i) { - self.ast.add_prod(rn, syms, prec, action); + self.ast.add_prod( + rn, + syms, + prec, + action, + Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)), + ); return Ok(j); } if self.lookahead_is("\"", i).is_some() || self.lookahead_is("'", i).is_some() { let (j, sym, span) = self.parse_token(i)?; + pos_prod_end = Some(j); i = self.parse_ws(j, true)?; if self.ast.tokens.insert(sym.clone()) { self.ast.spans.push(span); @@ -828,8 +844,10 @@ impl YaccParser { self.ast.spans.push(span); } prec = Some(sym); + pos_prod_end = Some(k); i = k; } else if self.lookahead_is("{", i).is_some() { + pos_prod_end = Some(i); let (j, a) = self.parse_action(i)?; i = self.parse_ws(j, true)?; action = Some(a); @@ -837,21 +855,23 @@ impl YaccParser { if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) { return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i)); } - } else if let Some(mut j) = self.lookahead_is("%empty", i) { - j = self.parse_ws(j, true)?; + } else if let Some(j) = self.lookahead_is("%empty", i) { + let k = self.parse_ws(j, true)?; // %empty could be followed by all sorts of weird syntax errors: all we try and do // is say "does this production look like it's finished" and trust that the other // errors will be caught by other parts of the parser. if !syms.is_empty() - | !(self.lookahead_is("|", j).is_some() - || self.lookahead_is(";", j).is_some() - || self.lookahead_is("{", j).is_some()) + | !(self.lookahead_is("|", k).is_some() + || self.lookahead_is(";", k).is_some() + || self.lookahead_is("{", k).is_some()) { return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i)); } - i = j; + pos_prod_end = Some(j); + i = k; } else { let (j, sym, span) = self.parse_token(i)?; + pos_prod_end = Some(j); if self.ast.tokens.contains(&sym) { syms.push(Symbol::Token(sym, span)); } else { @@ -1322,7 +1342,8 @@ mod test { Production { symbols: vec![token_span("a", a_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(32, 35), } ); assert_eq!(&src[a_span.start()..a_span.end()], "a"); @@ -1347,7 +1368,8 @@ mod test { Production { symbols: vec![token_span("a", a_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(32, 35), } ); assert_eq!(&src[a_span.start()..a_span.end()], "a"); @@ -1357,7 +1379,8 @@ mod test { Production { symbols: vec![token_span("b", Span::new(54, 55))], precedence: None, - action: None + action: None, + prod_span: Span::new(53, 56), } ); assert_eq!(&src[b_span.start()..b_span.end()], "b"); @@ -1383,7 +1406,8 @@ mod test { Production { symbols: vec![], precedence: None, - action: None + action: None, + prod_span: Span::new(32, 32), } ); @@ -1393,7 +1417,8 @@ mod test { Production { symbols: vec![token_span("b", b_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(50, 53), } ); assert_eq!(&src[b_span.start()..b_span.end()], "b"); @@ -1402,7 +1427,8 @@ mod test { Production { symbols: vec![], precedence: None, - action: None + action: None, + prod_span: Span::new(56, 56), } ); @@ -1411,7 +1437,8 @@ mod test { Production { symbols: vec![], precedence: None, - action: None + action: None, + prod_span: Span::new(74, 74), } ); let c_span = Span::new(77, 78); @@ -1420,7 +1447,8 @@ mod test { Production { symbols: vec![token_span("c", c_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(76, 79), } ); assert_eq!(&src[c_span.start()..c_span.end()], "c"); @@ -1451,7 +1479,8 @@ mod test { Production { symbols: vec![token_span("a", a_span), rule_span("B", b_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(7, 12), } ); assert_eq!(&src[a_span.start()..a_span.end()], "a"); @@ -1473,7 +1502,8 @@ mod test { Production { symbols: vec![token_span("a", a_span), token_span("b", b_span)], precedence: None, - action: None + action: None, + prod_span: Span::new(7, 14), } ); assert_eq!(&src[a_span.start()..a_span.end()], "a"); @@ -1552,7 +1582,8 @@ mod test { Production { symbols: vec![token_span("T", t_span)], precedence: None, - action: None + action: None, + prod_span: t_span, } ); assert_eq!(&src[t_span.start()..t_span.end() + 1], "T;"); @@ -2805,4 +2836,72 @@ B"; .unwrap(); } } + + #[test] + fn test_empty_production_spans_issue_473() { + let empty_prod_conflicts = [ + ( + "%start Expr +%% +Expr: %empty | Factor; +Factor: ')' Expr ')'; +", + (0, Span::new(21, 27)), + ), + ( + "%start Expr +%% +Expr: | Factor; +Factor: ')' Expr ')'; +", + (0, Span::new(21, 21)), + ), + ( + "%start Expr +%% +Expr:| Factor; +Factor: ')' Expr ')'; +", + (0, Span::new(20, 20)), + ), + ( + "%start Expr +%% +Expr: Factor | %empty; +Factor: ')' Expr ')'; +", + (1, Span::new(30, 36)), + ), + ( + "%start Expr +%% +Expr: Factor | ; +Factor: ')' Expr ')'; +", + (1, Span::new(30, 30)), + ), + ( + "%start Expr +%% +Expr: Factor|; +Factor: ')' Expr ')'; +", + (1, Span::new(28, 28)), + ), + ]; + + for (i, (src, (empty_pidx, empty_span))) in empty_prod_conflicts.iter().enumerate() { + eprintln!("{}", i); + let ast = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap(); + assert_eq!( + ast.prods[ast.get_rule("Expr").unwrap().pidxs[*empty_pidx]], + Production { + symbols: vec![], + precedence: None, + action: None, + prod_span: *empty_span, + } + ); + } + } } diff --git a/nimbleparse/src/diagnostics.rs b/nimbleparse/src/diagnostics.rs index d2319f758..0888b7b70 100644 --- a/nimbleparse/src/diagnostics.rs +++ b/nimbleparse/src/diagnostics.rs @@ -24,18 +24,14 @@ where usize: num_traits::AsPrimitive, StorageT: 'static + num_traits::PrimInt + num_traits::Unsigned, { - if usize::from(pidx) < ast.prods.len() { - let prod = &ast.prods[usize::from(pidx)]; - prod.symbols - .iter() - .map(|sym| match sym { - Symbol::Rule(name, span) => (format!("'{}'", name), span), - Symbol::Token(name, span) => (format!("'{}'", name), span), - }) - .unzip() - } else { - (vec![], vec![]) - } + let prod = &ast.prods[usize::from(pidx)]; + prod.symbols + .iter() + .map(|sym| match sym { + Symbol::Rule(name, span) => (format!("'{}'", name), span), + Symbol::Token(name, span) => (format!("'{}'", name), span), + }) + .unzip() } impl<'a> SpannedDiagnosticFormatter<'a> { @@ -120,7 +116,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { )); // Add underline upto the end of line. out.push_str(&(underline_c.to_string()).repeat(UnicodeWidthStr::width( - &self.src[underline_span.start()..underline_span.end()], + &self.src[underline_span.start()..underline_span.end().max(1)], ))); if source_lines.peek().is_none() { @@ -217,7 +213,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { out.push_str( &underline_c .to_string() - .repeat(UnicodeWidthStr::width(&self.src[span.start()..span.end()])), + .repeat(UnicodeWidthStr::width(&self.src[span.start()..span.end()]).max(1)), ); if let Some(next_span) = spans.peek() { out.push_str(&" ".repeat(UnicodeWidthStr::width( @@ -279,7 +275,8 @@ impl<'a> SpannedDiagnosticFormatter<'a> { let s_tok_span = grm.token_span(*s_tok_idx).unwrap(); let shift_name = grm.token_name(*s_tok_idx).unwrap(); let reduce_name = grm.rule_name_str(r_rule_idx); - let (_r_prod_names, r_prod_spans) = pidx_prods_data(ast, *r_prod_idx); + let (_r_prod_names, mut r_prod_spans) = pidx_prods_data(ast, *r_prod_idx); + let fallback_span = ast.prods[usize::from(*r_prod_idx)].prod_span; eprintln!( "{}", self.file_location_msg( @@ -300,13 +297,17 @@ impl<'a> SpannedDiagnosticFormatter<'a> { "{}", self.underline_span_with_text(r_rule_span, "Reduced rule".to_string(), '+') ); + + if r_prod_spans.is_empty() { + r_prod_spans.push(fallback_span); + } + let mut prod_lines = r_prod_spans .iter() .map(|span| self.nlc.span_line_bytes(*span)) .collect::>(); prod_lines.sort(); prod_lines.dedup(); - assert!(!r_prod_spans.is_empty()); for lines in &prod_lines { let mut spans_on_line = Vec::new();