diff --git a/cfgrammar/src/lib/yacc/ast.rs b/cfgrammar/src/lib/yacc/ast.rs index b9a4aab60..7e8c861fe 100644 --- a/cfgrammar/src/lib/yacc/ast.rs +++ b/cfgrammar/src/lib/yacc/ast.rs @@ -104,6 +104,7 @@ pub struct Production { pub enum Symbol { Rule(String, Span), Token(String, Span), + Empty(Span), } /// Specifies an index into a `GrammarAst.tokens` or a `GrammarAST.rules`. @@ -133,6 +134,7 @@ impl fmt::Display for Symbol { match *self { Symbol::Rule(ref s, _) => write!(f, "{}", s), Symbol::Token(ref s, _) => write!(f, "{}", s), + Symbol::Empty(_) => write!(f, "%empty"), } } } @@ -262,6 +264,7 @@ impl GrammarAST { }); } } + Symbol::Empty(_) => {} } } } @@ -300,6 +303,7 @@ impl GrammarAST { }); } } + Symbol::Empty(_) => {} } } Ok(()) @@ -311,6 +315,9 @@ impl GrammarAST { let (kind, span) = match symidx.symbol(self) { Symbol::Rule(_, span) => (YaccGrammarWarningKind::UnusedRule, span), Symbol::Token(_, span) => (YaccGrammarWarningKind::UnusedToken, span), + Symbol::Empty(_) => { + unreachable!() + } }; YaccGrammarWarning { kind, @@ -340,6 +347,9 @@ impl GrammarAST { Symbol::Token(sym_name, _) => { expected_unused_tokens.insert(sym_name); } + Symbol::Empty(_) => { + unreachable!(); + } } } if let Some(implicit_tokens) = self.implicit_tokens.as_ref() { @@ -364,6 +374,8 @@ impl GrammarAST { Symbol::Token(name, _) => { seen_tokens.insert(name); } + Symbol::Empty(_) => { + } }; } } diff --git a/cfgrammar/src/lib/yacc/grammar.rs b/cfgrammar/src/lib/yacc/grammar.rs index 8c20196e9..1c9b963ec 100644 --- a/cfgrammar/src/lib/yacc/grammar.rs +++ b/cfgrammar/src/lib/yacc/grammar.rs @@ -295,6 +295,7 @@ where prod.push(Symbol::Rule(rule_map[implicit_rule])); } } + ast::Symbol::Empty(_) => {} }; } let mut prec = None; diff --git a/cfgrammar/src/lib/yacc/parser.rs b/cfgrammar/src/lib/yacc/parser.rs index 22a1a1835..23327973d 100644 --- a/cfgrammar/src/lib/yacc/parser.rs +++ b/cfgrammar/src/lib/yacc/parser.rs @@ -660,6 +660,9 @@ impl YaccParser { i = self.parse_ws(i, true)?; while i < self.src.len() { if let Some(j) = self.lookahead_is("|", i) { + if syms.is_empty() { + syms.push(Symbol::Empty(Span::new(i, i))); + } self.ast.add_prod(rn.clone(), syms, prec, action); syms = Vec::new(); prec = None; @@ -667,6 +670,9 @@ impl YaccParser { i = self.parse_ws(j, true)?; continue; } else if let Some(j) = self.lookahead_is(";", i) { + if syms.is_empty() { + syms.push(Symbol::Empty(Span::new(i, i))); + } self.ast.add_prod(rn, syms, prec, action); return Ok(j); } @@ -694,19 +700,20 @@ impl YaccParser { if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) { return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i)); } - } else if let Some(mut j) = self.lookahead_is("%empty", i) { - j = self.parse_ws(j, true)?; + } else if let Some(j) = self.lookahead_is("%empty", i) { + let k = self.parse_ws(j, true)?; // %empty could be followed by all sorts of weird syntax errors: all we try and do // is say "does this production look like it's finished" and trust that the other // errors will be caught by other parts of the parser. if !syms.is_empty() - | !(self.lookahead_is("|", j).is_some() - || self.lookahead_is(";", j).is_some() - || self.lookahead_is("{", j).is_some()) + | !(self.lookahead_is("|", k).is_some() + || self.lookahead_is(";", k).is_some() + || self.lookahead_is("{", k).is_some()) { return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i)); } - i = j; + syms.push(Symbol::Empty(Span::new(i, j))); + i = k; } else { let (j, sym, span) = self.parse_token(i)?; if self.ast.tokens.contains(&sym) { @@ -1238,7 +1245,7 @@ mod test { assert_eq!( grm.prods[grm.get_rule("A").unwrap().pidxs[0]], Production { - symbols: vec![], + symbols: vec![Symbol::Empty(Span::new(32, 32))], precedence: None, action: None } @@ -1257,7 +1264,7 @@ mod test { assert_eq!( grm.prods[grm.get_rule("B").unwrap().pidxs[1]], Production { - symbols: vec![], + symbols: vec![Symbol::Empty(Span::new(56, 56))], precedence: None, action: None } @@ -1266,7 +1273,7 @@ mod test { assert_eq!( grm.prods[grm.get_rule("C").unwrap().pidxs[0]], Production { - symbols: vec![], + symbols: vec![Symbol::Empty(Span::new(74, 74))], precedence: None, action: None } @@ -2396,6 +2403,7 @@ Expr -> () : D; for sym in &ast.prods[*pidx].symbols { let name = match sym { Symbol::Token(name, _) | Symbol::Rule(name, _) => name.clone(), + Symbol::Empty(_) => "%empty".to_string(), }; prod_names.insert(name); } @@ -2403,7 +2411,7 @@ Expr -> () : D; assert_eq!(ast.prods.len(), 5); assert_eq!( prod_names, - HashSet::from_iter(["A", "B", "C", "D"].map(|s| s.to_owned())) + HashSet::from_iter(["A", "B", "C", "D", "%empty"].map(|s| s.to_owned())) ); } diff --git a/lrtable/src/lib/statetable.rs b/lrtable/src/lib/statetable.rs index c809d1637..047efbcfa 100644 --- a/lrtable/src/lib/statetable.rs +++ b/lrtable/src/lib/statetable.rs @@ -1041,6 +1041,7 @@ S: S | ;"; match sym { ast::Symbol::Rule(_, span) => assert_eq!(span, &Span::new(6, 7)), ast::Symbol::Token(_, _) => panic!("Incorrect symbol"), + ast::Symbol::Empty(_) => panic!("Empty symbol"), } } Err(e) => panic!("Incorrect error returned {:?}", e), @@ -1072,6 +1073,7 @@ T: S | ;"; match sym { ast::Symbol::Rule(_, span) => assert_eq!(span, &Span::new(15, 16)), ast::Symbol::Token(_, _) => panic!("Incorrect symbol"), + ast::Symbol::Empty(_) => panic!("Empty symbol"), } } Err(e) => panic!("Incorrect error returned {:?}", e), diff --git a/nimbleparse/src/diagnostics.rs b/nimbleparse/src/diagnostics.rs index babfdaedf..0dae8503b 100644 --- a/nimbleparse/src/diagnostics.rs +++ b/nimbleparse/src/diagnostics.rs @@ -24,18 +24,17 @@ where usize: num_traits::AsPrimitive, StorageT: 'static + num_traits::PrimInt + num_traits::Unsigned, { - if usize::from(pidx) < ast.prods.len() { - let prod = &ast.prods[usize::from(pidx)]; - prod.symbols - .iter() - .map(|sym| match sym { - Symbol::Rule(name, span) => (format!("'{}'", name), span), - Symbol::Token(name, span) => (format!("'{}'", name), span), - }) - .unzip() - } else { - (vec![], vec![]) - } + let pidx = usize::from(pidx); + assert!(pidx < ast.prods.len()); + let prod = &ast.prods[pidx]; + prod.symbols + .iter() + .map(|sym| match sym { + Symbol::Rule(name, span) => (format!("'{}'", name), span), + Symbol::Token(name, span) => (format!("'{}'", name), span), + Symbol::Empty(span) => (format!("%empty"), span), + }) + .unzip() } impl<'a> SpannedDiagnosticFormatter<'a> {