diff --git a/cfgrammar/src/lib/yacc/ast.rs b/cfgrammar/src/lib/yacc/ast.rs index b9a4aab60..2fd906f39 100644 --- a/cfgrammar/src/lib/yacc/ast.rs +++ b/cfgrammar/src/lib/yacc/ast.rs @@ -7,13 +7,14 @@ use indexmap::{IndexMap, IndexSet}; use super::{ parser::YaccParser, Precedence, YaccGrammarError, YaccGrammarErrorKind, YaccGrammarWarning, - YaccGrammarWarningKind, YaccKind, + YaccGrammarWarningKind, YaccKind, YaccKindResolver, }; use crate::Span; /// Contains a `GrammarAST` structure produced from a grammar source file. /// As well as any errors which occurred during the construction of the AST. pub struct ASTWithValidityInfo { + yacc_kind: Option, ast: GrammarAST, errs: Vec, } @@ -23,18 +24,22 @@ impl ASTWithValidityInfo { /// encountered during the construction of it. The `ASTWithValidityInfo` can be /// then unused to construct a `YaccGrammar`, which will either produce an /// `Ok(YaccGrammar)` or an `Err` which includes these errors. - pub fn new(yacc_kind: YaccKind, s: &str) -> Self { + pub fn new(yacc_kind_resolver: YaccKindResolver, s: &str) -> Self { let mut errs = Vec::new(); - let ast = match yacc_kind { - YaccKind::Original(_) | YaccKind::Grmtools | YaccKind::Eco => { - let mut yp = YaccParser::new(yacc_kind, s.to_string()); - yp.parse().map_err(|e| errs.extend(e)).ok(); - let mut ast = yp.ast(); + let (yacc_kind, ast) = { + let mut yp = YaccParser::new(yacc_kind_resolver, s.to_string()); + yp.parse().map_err(|e| errs.extend(e)).ok(); + let (yacc_kind, mut ast) = yp.build(); + if yacc_kind.is_some() { ast.complete_and_validate().map_err(|e| errs.push(e)).ok(); - ast } + (yacc_kind, ast) }; - ASTWithValidityInfo { ast, errs } + ASTWithValidityInfo { + ast, + errs, + yacc_kind, + } } /// Returns a `GrammarAST` constructed as the result of parsing a source file. @@ -51,6 +56,11 @@ impl ASTWithValidityInfo { self.errors().is_empty() } + /// Returns the `YaccKind` that was used to parse the `GrammarAST`. + pub fn yacc_kind(&self) -> Option { + self.yacc_kind + } + /// Returns all errors which were encountered during AST construction. pub fn errors(&self) -> &[YaccGrammarError] { self.errs.as_slice() diff --git a/cfgrammar/src/lib/yacc/firsts.rs b/cfgrammar/src/lib/yacc/firsts.rs index b1040c8dc..6bea7312f 100644 --- a/cfgrammar/src/lib/yacc/firsts.rs +++ b/cfgrammar/src/lib/yacc/firsts.rs @@ -9,7 +9,7 @@ use crate::{RIdx, Symbol, TIdx}; /// `Firsts` stores all the first sets for a given grammar. For example, given this code and /// grammar: /// ```text -/// let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " +/// let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " /// S: A 'b'; /// A: 'a' /// | ;").unwrap(); @@ -143,7 +143,7 @@ where #[cfg(test)] mod test { use super::{ - super::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + super::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, YaccFirsts, }; use num_traits::{AsPrimitive, PrimInt, Unsigned}; @@ -180,7 +180,7 @@ mod test { #[test] fn test_first() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start C %token c d @@ -202,7 +202,7 @@ mod test { #[test] fn test_first_no_subsequent_rules() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start C %token c d @@ -220,7 +220,7 @@ mod test { #[test] fn test_first_epsilon() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %token a b c @@ -241,7 +241,7 @@ mod test { #[test] fn test_last_epsilon() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %token b c @@ -261,7 +261,7 @@ mod test { #[test] fn test_first_no_multiples() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %token b c @@ -277,7 +277,7 @@ mod test { fn eco_grammar() -> YaccGrammar { YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %token a b c d f @@ -308,7 +308,7 @@ mod test { #[test] fn test_first_from_eco_bug() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start E %token a b c d e f diff --git a/cfgrammar/src/lib/yacc/follows.rs b/cfgrammar/src/lib/yacc/follows.rs index 7c39f8ffa..4af52cc84 100644 --- a/cfgrammar/src/lib/yacc/follows.rs +++ b/cfgrammar/src/lib/yacc/follows.rs @@ -9,7 +9,7 @@ use crate::{RIdx, Symbol, TIdx}; /// `Follows` stores all the Follow sets for a given grammar. For example, given this code and /// grammar: /// ```text -/// let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " +/// let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " /// S: A 'b'; /// A: 'a' | ; /// ").unwrap(); @@ -115,7 +115,7 @@ where #[cfg(test)] mod test { use super::{ - super::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + super::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, YaccFollows, }; use num_traits::{AsPrimitive, PrimInt, Unsigned}; @@ -149,7 +149,7 @@ mod test { fn test_follow() { // Adapted from p2 of https://www.cs.uaf.edu/~cs331/notes/FirstFollow.pdf let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start E %% @@ -173,7 +173,7 @@ mod test { fn test_follow2() { // Adapted from https://www.l2f.inesc-id.pt/~david/w/pt/Top-Down_Parsing/Exercise_5:_Test_2010/07/01 let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -196,7 +196,7 @@ mod test { #[test] fn test_follow3() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %% @@ -213,7 +213,7 @@ mod test { #[test] fn test_follow_corchuelo() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start E %% diff --git a/cfgrammar/src/lib/yacc/grammar.rs b/cfgrammar/src/lib/yacc/grammar.rs index 8c20196e9..42f8fe459 100644 --- a/cfgrammar/src/lib/yacc/grammar.rs +++ b/cfgrammar/src/lib/yacc/grammar.rs @@ -6,7 +6,10 @@ use num_traits::{AsPrimitive, PrimInt, Unsigned}; use serde::{Deserialize, Serialize}; use vob::Vob; -use super::{ast, firsts::YaccFirsts, follows::YaccFollows, parser::YaccGrammarResult, YaccKind}; +use super::{ + ast, firsts::YaccFirsts, follows::YaccFollows, parser::YaccGrammarResult, YaccKind, + YaccKindResolver, +}; use crate::{PIdx, RIdx, SIdx, Span, Symbol, TIdx}; const START_RULE: &str = "^"; @@ -89,7 +92,7 @@ pub struct YaccGrammar { // create the start rule ourselves (without relying on user input), this is a safe assumption. impl YaccGrammar { - pub fn new(yacc_kind: YaccKind, s: &str) -> YaccGrammarResult { + pub fn new(yacc_kind: YaccKindResolver, s: &str) -> YaccGrammarResult { YaccGrammar::new_with_storaget(yacc_kind, s) } } @@ -105,13 +108,15 @@ where /// As we're compiling the `YaccGrammar`, we add a new start rule (which we'll refer to as `^`, /// though the actual name is a fresh name that is guaranteed to be unique) that references the /// user defined start rule. - pub fn new_with_storaget(yacc_kind: YaccKind, s: &str) -> YaccGrammarResult { - let ast_validation = ast::ASTWithValidityInfo::new(yacc_kind, s); - Self::new_from_ast_with_validity_info(yacc_kind, &ast_validation) + pub fn new_with_storaget( + yacc_kind_resolver: YaccKindResolver, + s: &str, + ) -> YaccGrammarResult { + let ast_validation = ast::ASTWithValidityInfo::new(yacc_kind_resolver, s); + Self::new_from_ast_with_validity_info(&ast_validation) } pub fn new_from_ast_with_validity_info( - yacc_kind: YaccKind, ast_validation: &ast::ASTWithValidityInfo, ) -> YaccGrammarResult { if !ast_validation.is_valid() { @@ -149,7 +154,10 @@ where let implicit_rule; let implicit_start_rule; - match yacc_kind { + match ast_validation + .yacc_kind() + .expect("is_valid() ensures Some(yacc_kind)") + { YaccKind::Original(_) | YaccKind::Grmtools => { implicit_rule = None; implicit_start_rule = None; @@ -1033,7 +1041,9 @@ where #[cfg(test)] mod test { use super::{ - super::{AssocKind, Precedence, YaccGrammar, YaccKind, YaccOriginalActionKind}, + super::{ + AssocKind, Precedence, YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind, + }, rule_max_costs, rule_min_costs, IMPLICIT_RULE, IMPLICIT_START_RULE, }; use crate::{PIdx, RIdx, Span, Symbol, TIdx}; @@ -1056,7 +1066,7 @@ mod test { #[test] fn test_minimal() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), "%start R %token T %% R: 'T';", ) .unwrap(); @@ -1087,7 +1097,7 @@ mod test { #[test] fn test_rule_ref() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), "%start R %token T %% R : S; S: 'T';", ) .unwrap(); @@ -1116,7 +1126,7 @@ mod test { #[rustfmt::skip] fn test_long_prod() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), "%start R %token T1 T2 %% R : S 'T1' S; S: 'T2';" ).unwrap(); @@ -1147,7 +1157,7 @@ mod test { #[test] fn test_prods_rules() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1170,7 +1180,7 @@ mod test { #[rustfmt::skip] fn test_left_right_nonassoc_precs() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %right '=' @@ -1203,7 +1213,7 @@ mod test { #[rustfmt::skip] fn test_prec_override() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start expr %left '+' '-' @@ -1231,7 +1241,7 @@ mod test { #[rustfmt::skip] fn test_implicit_tokens_rewrite() { let grm = YaccGrammar::new( - YaccKind::Eco, + YaccKindResolver::Force(YaccKind::Eco), " %implicit_tokens ws1 ws2 %start S @@ -1307,7 +1317,7 @@ mod test { #[rustfmt::skip] fn test_has_path() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1334,7 +1344,7 @@ mod test { #[rustfmt::skip] fn test_rule_min_costs() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1357,7 +1367,7 @@ mod test { #[test] fn test_min_sentences() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1405,7 +1415,7 @@ mod test { #[rustfmt::skip] fn test_rule_max_costs1() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1429,7 +1439,7 @@ mod test { #[rustfmt::skip] fn test_rule_max_costs2() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -1451,7 +1461,7 @@ mod test { fn test_out_of_order_productions() { // Example taken from p54 of Locally least-cost error repair in LR parsers, Carl Cerecke let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %% @@ -1482,8 +1492,11 @@ mod test { #[test] fn test_token_spans() { let src = "%%\nAB: 'a' | 'foo';"; - let grm = - YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap(); + let grm = YaccGrammar::new( + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::NoAction)), + src, + ) + .unwrap(); let token_map = grm.tokens_map(); let a_tidx = token_map.get("a"); let foo_tidx = token_map.get("foo"); @@ -1507,8 +1520,11 @@ mod test { AB: A AB | B ';' AB; %% "; - let grm = - YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap(); + let grm = YaccGrammar::new( + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::NoAction)), + src, + ) + .unwrap(); let token_map = grm.tokens_map(); let c_tidx = token_map.get("c").unwrap(); assert_eq!(grm.token_name(*c_tidx), Some("c")); diff --git a/cfgrammar/src/lib/yacc/mod.rs b/cfgrammar/src/lib/yacc/mod.rs index 8a2bfcd12..7034aaaad 100644 --- a/cfgrammar/src/lib/yacc/mod.rs +++ b/cfgrammar/src/lib/yacc/mod.rs @@ -14,6 +14,17 @@ pub use self::{ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum YaccKindResolver { + /// The user can specify `%grmtools` in their grammar but unless it is compatible with this `YaccKind`, it's an error + Force(YaccKind), + /// Use `YaccKind` if the user doesn't specify `%grmtools` in their grammar + Default(YaccKind), + /// The user must specify `%grmtools` in their grammars or we throw an error + NoDefault, +} + /// The particular Yacc variant this grammar makes use of. #[derive(Clone, Copy, Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/cfgrammar/src/lib/yacc/parser.rs b/cfgrammar/src/lib/yacc/parser.rs index 3bac4398e..d91f05736 100644 --- a/cfgrammar/src/lib/yacc/parser.rs +++ b/cfgrammar/src/lib/yacc/parser.rs @@ -18,7 +18,7 @@ use crate::{Span, Spanned}; use super::{ ast::{GrammarAST, Symbol}, - AssocKind, Precedence, YaccKind, + AssocKind, Precedence, YaccKind, YaccKindResolver, YaccOriginalActionKind, }; /// The various different possible Yacc parser errors. @@ -56,6 +56,10 @@ pub enum YaccGrammarErrorKind { UnknownToken(String), NoPrecForToken(String), UnknownEPP(String), + InvalidYaccKind, + InvalidGrmtoolsHeaderKey, + DuplicateGrmtoolsHeaderKey, + MissingGrmtoolsHeader, } /// Any error from the Yacc parser returns an instance of this struct. @@ -141,6 +145,12 @@ impl fmt::Display for YaccGrammarErrorKind { name ) } + YaccGrammarErrorKind::MissingGrmtoolsHeader => "Missing '%grmtools' header section", + YaccGrammarErrorKind::DuplicateGrmtoolsHeaderKey => { + "Duplicate header key in %grmtools section" + } + YaccGrammarErrorKind::InvalidGrmtoolsHeaderKey => "Invalid key in %grmtools section", + YaccGrammarErrorKind::InvalidYaccKind => "Invalid yacc kind", }; write!(f, "{}", s) } @@ -245,6 +255,9 @@ impl Spanned for YaccGrammarError { | YaccGrammarErrorKind::UnknownRuleRef(_) | YaccGrammarErrorKind::UnknownToken(_) | YaccGrammarErrorKind::NoPrecForToken(_) + | YaccGrammarErrorKind::MissingGrmtoolsHeader + | YaccGrammarErrorKind::InvalidGrmtoolsHeaderKey + | YaccGrammarErrorKind::InvalidYaccKind | YaccGrammarErrorKind::UnknownEPP(_) => SpansKind::Error, YaccGrammarErrorKind::DuplicatePrecedence | YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration @@ -253,13 +266,15 @@ impl Spanned for YaccGrammarError { | YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration | YaccGrammarErrorKind::DuplicateStartDeclaration | YaccGrammarErrorKind::DuplicateActiontypeDeclaration + | YaccGrammarErrorKind::DuplicateGrmtoolsHeaderKey | YaccGrammarErrorKind::DuplicateEPP => SpansKind::DuplicationError, } } } pub(crate) struct YaccParser { - yacc_kind: YaccKind, + yacc_kind_resolver: YaccKindResolver, + yacc_kind: Option, src: String, num_newlines: usize, ast: GrammarAST, @@ -295,9 +310,10 @@ fn add_duplicate_occurrence( /// The actual parser is intended to be entirely opaque from outside users. impl YaccParser { - pub(crate) fn new(yacc_kind: YaccKind, src: String) -> YaccParser { + pub(crate) fn new(yacc_kind_resolver: YaccKindResolver, src: String) -> YaccParser { YaccParser { - yacc_kind, + yacc_kind_resolver, + yacc_kind: None, src, num_newlines: 0, ast: GrammarAST::new(), @@ -310,7 +326,27 @@ impl YaccParser { // We pass around an index into the *bytes* of self.src. We guarantee that at all times // this points to the beginning of a UTF-8 character (since multibyte characters exist, not // every byte within the string is also a valid character). - let mut result = self.parse_declarations(0, &mut errs); + let mut result = self.parse_grmtools_header(0); + if result.is_ok() && self.yacc_kind.is_none() { + match self.yacc_kind_resolver { + YaccKindResolver::Default(kind) | YaccKindResolver::Force(kind) => { + self.yacc_kind = Some(kind); + } + YaccKindResolver::NoDefault => { + result = Err(self.mk_error(YaccGrammarErrorKind::InvalidYaccKind, 0)); + } + } + } + result = self.parse_declarations( + match result { + Ok(i) => i, + Err(e) => { + errs.push(e); + return Err(errs); + } + }, + &mut errs, + ); result = self.parse_rules(match result { Ok(i) => i, Err(e) => { @@ -339,8 +375,99 @@ impl YaccParser { } } - pub(crate) fn ast(self) -> GrammarAST { - self.ast + fn parse_yacckind( + &mut self, + i: usize, + update_yacc_kind: bool, + ) -> Result { + // Compares haystack converted to lowercase to needle (assumed to be lowercase). + fn starts_with_lower(needle: &'static str, haystack: &'_ str) -> bool { + if let Some((prefix, _)) = haystack.split_at_checked(needle.len()) { + prefix.to_lowercase() == needle + } else { + false + } + } + + const YACC_KINDS: [(&str, YaccKind); 5] = [ + ("grmtools", YaccKind::Grmtools), + ( + "original(noaction)", + YaccKind::Original(YaccOriginalActionKind::NoAction), + ), + ( + "original(useraction)", + YaccKind::Original(YaccOriginalActionKind::UserAction), + ), + ( + "original(genericparsetree)", + YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + ), + ("Eco", YaccKind::Eco), + ]; + let j = self.parse_ws(i, false)?; + let s = &self.src[i..]; + for (kind_name, kind) in YACC_KINDS { + if starts_with_lower(kind_name, s) { + if update_yacc_kind { + self.yacc_kind = Some(kind); + } + let end_pos = j + kind_name.len(); + return Ok(end_pos); + } + } + // This strikes me as an interesting case where we know the start pos of the + // invalid yacc kind value, but do not know the end, we could `self.parse_to_eol()`. + Err(self.mk_error(YaccGrammarErrorKind::InvalidYaccKind, i)) + } + + fn parse_grmtools_header(&mut self, mut i: usize) -> Result { + let mut yacc_kind_key_span = None; + let update_yacc_kind = !matches!(self.yacc_kind_resolver, YaccKindResolver::Force(_)); + let require_yacc_kind = matches!(self.yacc_kind_resolver, YaccKindResolver::NoDefault); + + i = self.parse_ws(i, true)?; + if let Some(j) = self.lookahead_is("%grmtools", i) { + i = self.parse_ws(j, true)?; + if let Some(j) = self.lookahead_is("{", i) { + i = self.parse_ws(j, true)?; + while self.lookahead_is("}", i).is_none() { + let key_start_pos = i; + let (key_end_pos, key) = self.parse_name(i)?; + i = self.parse_ws(key_end_pos, false)?; + if key == "yacckind" { + let val_end_pos = self.parse_yacckind(i, update_yacc_kind)?; + if let Some(orig) = yacc_kind_key_span { + let dupe = Span::new(key_start_pos, key_end_pos); + return Err(YaccGrammarError { + kind: YaccGrammarErrorKind::DuplicateGrmtoolsHeaderKey, + spans: vec![orig, dupe], + }); + } else { + yacc_kind_key_span = Some(Span::new(key_start_pos, key_end_pos)); + i = self.parse_ws(val_end_pos, true)?; + } + } else { + return Err(YaccGrammarError { + kind: YaccGrammarErrorKind::InvalidGrmtoolsHeaderKey, + spans: vec![Span::new(key_start_pos, key_end_pos)], + }); + } + } + if let Some(i) = self.lookahead_is("}", i) { + return Ok(i); + } + } else if require_yacc_kind { + return Err(self.mk_error(YaccGrammarErrorKind::MissingGrmtoolsHeader, i)); + } + } else if require_yacc_kind { + return Err(self.mk_error(YaccGrammarErrorKind::MissingGrmtoolsHeader, i)); + } + Ok(i) + } + + pub(crate) fn build(self) -> (Option, GrammarAST) { + (self.yacc_kind, self.ast) } fn parse_declarations( @@ -365,7 +492,7 @@ impl YaccParser { } continue; } - if let YaccKind::Original(_) = self.yacc_kind { + if let Some(YaccKind::Original(_)) = self.yacc_kind { if let Some(j) = self.lookahead_is("%actiontype", i) { i = self.parse_ws(j, false)?; let (j, n) = self.parse_to_eol(i)?; @@ -526,7 +653,7 @@ impl YaccParser { i = self.parse_ws(j, true)?; continue; } - if let YaccKind::Eco = self.yacc_kind { + if let Some(YaccKind::Eco) = self.yacc_kind { if let Some(j) = self.lookahead_is("%implicit_tokens", i) { i = self.parse_ws(j, false)?; let num_newlines = self.num_newlines; @@ -623,7 +750,8 @@ impl YaccParser { self.ast.start = Some((rn.clone(), span)); } match self.yacc_kind { - YaccKind::Original(_) | YaccKind::Eco => { + None => unreachable!("Concrete YaccKind resolved at this point"), + Some(YaccKind::Original(_)) | Some(YaccKind::Eco) => { if self.ast.get_rule(&rn).is_none() { self.ast.add_rule( (rn.clone(), span), @@ -632,7 +760,7 @@ impl YaccParser { } i = j; } - YaccKind::Grmtools => { + Some(YaccKind::Grmtools) => { i = self.parse_ws(j, true)?; if let Some(j) = self.lookahead_is("->", i) { i = j; @@ -1002,16 +1130,16 @@ mod test { use super::{ super::{ ast::{GrammarAST, Production, Symbol}, - AssocKind, Precedence, YaccKind, YaccOriginalActionKind, + AssocKind, Precedence, YaccKind, YaccKindResolver, YaccOriginalActionKind, }, Span, Spanned, YaccGrammarError, YaccGrammarErrorKind, YaccParser, }; use std::collections::HashSet; fn parse(yacc_kind: YaccKind, s: &str) -> Result> { - let mut yp = YaccParser::new(yacc_kind, s.to_string()); + let mut yp = YaccParser::new(YaccKindResolver::Force(yacc_kind), s.to_string()); yp.parse()?; - Ok(yp.ast()) + Ok(yp.build().1) } fn rule(n: &str) -> Symbol { diff --git a/lrpar/cttests/src/calc_nodefault_yacckind.test b/lrpar/cttests/src/calc_nodefault_yacckind.test new file mode 100644 index 000000000..756e52a3f --- /dev/null +++ b/lrpar/cttests/src/calc_nodefault_yacckind.test @@ -0,0 +1,39 @@ +name: Test basic user actions using the calculator grammar +grammar: | + %grmtools {yacckind Original(UserAction)} + %start Expr + %actiontype Result + %avoid_insert 'INT' + %% + Expr: Expr '+' Term { Ok($1? + $3?) } + | Term { $1 } + ; + + Term: Term '*' Factor { Ok($1? * $3?) } + | Factor { $1 } + ; + + Factor: '(' Expr ')' { $2 } + | 'INT' { + let l = $1.map_err(|_| ())?; + match $lexer.span_str(l.span()).parse::() { + Ok(v) => Ok(v), + Err(_) => { + let ((_, col), _) = $lexer.line_col(l.span()); + eprintln!("Error at column {}: '{}' cannot be represented as a u64", + col, + $lexer.span_str(l.span())); + Err(()) + } + } + } + ; + +lexer: | + %% + [0-9]+ "INT" + \+ "+" + \* "*" + \( "(" + \) ")" + [\t ]+ ; diff --git a/lrpar/cttests/src/cgen_helper.rs b/lrpar/cttests/src/cgen_helper.rs index b90755574..00ba5b410 100644 --- a/lrpar/cttests/src/cgen_helper.rs +++ b/lrpar/cttests/src/cgen_helper.rs @@ -18,18 +18,19 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box { - YaccKind::Original(YaccOriginalActionKind::NoAction) + let yacckind = match docs[0]["yacckind"].as_str() { + Some("Original(YaccOriginalActionKind::NoAction)") => { + Some(YaccKind::Original(YaccOriginalActionKind::NoAction)) } - "Original(YaccOriginalActionKind::UserAction)" => { - YaccKind::Original(YaccOriginalActionKind::UserAction) + Some("Original(YaccOriginalActionKind::UserAction)") => { + Some(YaccKind::Original(YaccOriginalActionKind::UserAction)) } - "Grmtools" => YaccKind::Grmtools, - "Original(YaccOriginalActionKind::GenericParseTree)" => { - YaccKind::Original(YaccOriginalActionKind::GenericParseTree) + Some("Grmtools") => Some(YaccKind::Grmtools), + Some("Original(YaccOriginalActionKind::GenericParseTree)") => { + Some(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)) } - s => panic!("YaccKind '{}' not supported", s), + Some(s) => panic!("YaccKind '{}' not supported", s), + None => None, }; let (negative_yacc_flags, positive_yacc_flags) = &docs[0]["yacc_flags"] .as_vec() @@ -88,8 +89,11 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box>::new() - .yacckind(yacckind) + let mut cp_build = CTParserBuilder::>::new(); + if let Some(yacckind) = yacckind { + cp_build = cp_build.yacckind(yacckind); + } + cp_build = cp_build .grammar_path(pg.to_str().unwrap()) .output_path(&outp); if let Some(flag) = check_flag(yacc_flags, "error_on_conflicts") { diff --git a/lrpar/cttests/src/lib.rs b/lrpar/cttests/src/lib.rs index 34f9c8be9..8f2355320 100644 --- a/lrpar/cttests/src/lib.rs +++ b/lrpar/cttests/src/lib.rs @@ -19,6 +19,9 @@ lrpar_mod!("calc_actiontype.y"); lrlex_mod!("calc_noactions.l"); lrpar_mod!("calc_noactions.y"); +lrlex_mod!("calc_nodefault_yacckind.l"); +lrpar_mod!("calc_nodefault_yacckind.y"); + lrlex_mod!("calc_unsafeaction.l"); lrpar_mod!("calc_unsafeaction.y"); @@ -75,6 +78,15 @@ fn test_basic_actions() { } } +#[test] +fn test_nodefault_yacckind() { + let lexerdef = calc_nodefault_yacckind_l::lexerdef(); + let lexer = lexerdef.lexer("2+3"); + match calc_nodefault_yacckind_y::parse(&lexer) { + (Some(Ok(5)), ref errs) if errs.is_empty() => (), + _ => unreachable!(), + } +} #[test] fn test_unsafe_actions() { let lexerdef = calc_unsafeaction_l::lexerdef(); diff --git a/lrpar/examples/calc_actions/src/calc.y b/lrpar/examples/calc_actions/src/calc.y index 41176308f..1797499b0 100644 --- a/lrpar/examples/calc_actions/src/calc.y +++ b/lrpar/examples/calc_actions/src/calc.y @@ -1,3 +1,6 @@ +%grmtools { + yacckind Grmtools +} %start Expr %avoid_insert "INT" %% diff --git a/lrpar/examples/calc_parsetree/src/calc.y b/lrpar/examples/calc_parsetree/src/calc.y index e5d7a6466..c4e68f3dc 100644 --- a/lrpar/examples/calc_parsetree/src/calc.y +++ b/lrpar/examples/calc_parsetree/src/calc.y @@ -1,3 +1,4 @@ +%grmtools{yacckind Original(GenericParseTree)} %start Expr %avoid_insert "INT" %% diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index f7003b50c..d26f734ff 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -18,7 +18,9 @@ use std::{ use bincode::{deserialize, serialize_into}; use cfgrammar::{ newlinecache::NewlineCache, - yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{ + ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind, + }, RIdx, Spanned, Symbol, }; use filetime::FileTime; @@ -391,7 +393,7 @@ where /// # Panics /// /// If `StorageT` is not big enough to index the grammar's tokens, rules, or productions. - pub fn build(self) -> Result, Box> { + pub fn build(mut self) -> Result, Box> { let grmp = self .grammar_path .as_ref() @@ -401,10 +403,9 @@ where .as_ref() .expect("output_path must be specified before processing."); let yk = match self.yacckind { - None => panic!("yacckind must be specified before processing."), - Some(YaccKind::Original(x)) => YaccKind::Original(x), - Some(YaccKind::Grmtools) => YaccKind::Grmtools, + None => YaccKindResolver::NoDefault, Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."), + Some(x) => YaccKindResolver::Force(x), }; { @@ -418,6 +419,7 @@ where let inc = read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?; let ast_validation = ASTWithValidityInfo::new(yk, &inc); + self.yacckind = ast_validation.yacc_kind(); let warnings = ast_validation.ast().warnings(); let spanned_fmt = |x: &dyn Spanned, inc: &str, line_cache: &NewlineCache| { if let Some((line, column)) = @@ -429,7 +431,7 @@ where } }; - let res = YaccGrammar::::new_from_ast_with_validity_info(yk, &ast_validation); + let res = YaccGrammar::::new_from_ast_with_validity_info(&ast_validation); let grm = match res { Ok(_) if self.warnings_are_errors && !warnings.is_empty() => { let mut line_cache = NewlineCache::new(); diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 66a423b2f..82e969950 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -954,7 +954,7 @@ pub(crate) mod test { use std::collections::HashMap; use cfgrammar::{ - yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, Span, }; use lrtable::{from_yacc, Minimiser}; @@ -1004,7 +1004,7 @@ pub(crate) mod test { >, ) { let grm = YaccGrammar::::new_with_storaget( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), grms, ) .unwrap(); diff --git a/lrtable/src/lib/itemset.rs b/lrtable/src/lib/itemset.rs index 514742ac6..bf40c93ee 100644 --- a/lrtable/src/lib/itemset.rs +++ b/lrtable/src/lib/itemset.rs @@ -159,7 +159,7 @@ where #[cfg(test)] mod test { use cfgrammar::{ - yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, SIdx, Symbol, }; use vob::Vob; @@ -172,7 +172,7 @@ mod test { fn test_dragon_grammar() { // From http://binarysculpting.com/2012/02/04/computing-lr1-closure/ let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %% @@ -200,7 +200,7 @@ mod test { fn eco_grammar() -> YaccGrammar { YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %token a b c d f @@ -251,7 +251,7 @@ mod test { // aSb fn grammar3() -> YaccGrammar { YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %token a b c d diff --git a/lrtable/src/lib/pager.rs b/lrtable/src/lib/pager.rs index c4efdf21d..a354a11f7 100644 --- a/lrtable/src/lib/pager.rs +++ b/lrtable/src/lib/pager.rs @@ -400,7 +400,7 @@ mod test { use crate::{pager::pager_stategraph, stategraph::state_exists, StIdx}; use cfgrammar::{ - yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, SIdx, Symbol, }; @@ -440,7 +440,7 @@ mod test { // aSb fn grammar3() -> YaccGrammar { YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %token a b c d @@ -519,7 +519,7 @@ mod test { // Pager grammar fn grammar_pager() -> YaccGrammar { YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start X %% diff --git a/lrtable/src/lib/stategraph.rs b/lrtable/src/lib/stategraph.rs index bfc011775..8a2cea06e 100644 --- a/lrtable/src/lib/stategraph.rs +++ b/lrtable/src/lib/stategraph.rs @@ -250,7 +250,7 @@ pub(crate) fn state_exists( mod test { use crate::{pager::pager_stategraph, StIdx}; use cfgrammar::{ - yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind}, Symbol, }; @@ -259,7 +259,7 @@ mod test { fn test_statetable_core() { // Taken from p13 of https://link.springer.com/article/10.1007/s00236-010-0115-6 let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% diff --git a/lrtable/src/lib/statetable.rs b/lrtable/src/lib/statetable.rs index 37fdf269f..4a2a8d127 100644 --- a/lrtable/src/lib/statetable.rs +++ b/lrtable/src/lib/statetable.rs @@ -600,7 +600,7 @@ mod test { use cfgrammar::{ yacc::{ ast::{self, ASTWithValidityInfo}, - YaccGrammar, YaccKind, YaccOriginalActionKind, + YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind, }, PIdx, Span, Symbol, TIdx, }; @@ -611,7 +611,7 @@ mod test { fn test_statetable() { // Taken from p19 of www.cs.umd.edu/~mvz/cmsc430-s07/M10lr.pdf let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %% @@ -692,7 +692,7 @@ mod test { #[test] #[rustfmt::skip] fn test_default_reduce_reduce() { - let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% A : B 'x' | C 'x' 'x'; @@ -716,7 +716,7 @@ mod test { #[test] #[rustfmt::skip] fn test_default_shift_reduce() { - let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %% Expr : Expr '+' Expr @@ -746,7 +746,7 @@ mod test { #[rustfmt::skip] fn test_conflict_resolution() { // Example taken from p54 of Locally least-cost error repair in LR parsers, Carl Cerecke - let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start S %% S: A 'c' 'd' @@ -771,7 +771,7 @@ mod test { #[test] #[rustfmt::skip] fn test_left_associativity() { - let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %left '+' %left '*' @@ -810,7 +810,7 @@ mod test { #[test] #[rustfmt::skip] fn test_left_right_associativity() { - let grm = &YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = &YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %right '=' %left '+' @@ -866,7 +866,7 @@ mod test { #[test] #[rustfmt::skip] fn test_left_right_nonassoc_associativity() { - let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), " + let grm = YaccGrammar::new(YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start Expr %right '=' %left '+' @@ -941,7 +941,7 @@ mod test { #[test] fn conflicts() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start A %% @@ -977,7 +977,7 @@ C : 'a'; #[test] fn accept_reduce_conflict() { let grm = YaccGrammar::new( - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), + YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)), " %start D %% @@ -1000,9 +1000,9 @@ D : D; fn test_accept_reduce_conflict_spans1() { let src = "%% S: S | ;"; - let yk = YaccKind::Original(YaccOriginalActionKind::NoAction); + let yk = YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::NoAction)); let ast_validity = ASTWithValidityInfo::new(yk, src); - let grm = YaccGrammar::::new_from_ast_with_validity_info(yk, &ast_validity).unwrap(); + let grm = YaccGrammar::::new_from_ast_with_validity_info(&ast_validity).unwrap(); let sg = pager_stategraph(&grm); match StateTable::new(&grm, &sg) { Ok(_) => panic!("Expected accept reduce conflict"), @@ -1052,9 +1052,9 @@ S: S | ;"; let src = "%% S: T | ; T: S | ;"; - let yk = YaccKind::Original(YaccOriginalActionKind::NoAction); + let yk = YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::NoAction)); let ast_validity = ASTWithValidityInfo::new(yk, src); - let grm = YaccGrammar::::new_from_ast_with_validity_info(yk, &ast_validity).unwrap(); + let grm = YaccGrammar::::new_from_ast_with_validity_info(&ast_validity).unwrap(); let sg = pager_stategraph(&grm); match StateTable::new(&grm, &sg) { Ok(_) => panic!("Expected accept reduce conflict"), diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index 88683d8dd..3d7dc8d2e 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -1,7 +1,9 @@ mod diagnostics; use crate::diagnostics::*; use cfgrammar::{ - yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{ + ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind, + }, Span, }; use getopts::Options; @@ -123,13 +125,13 @@ fn main() { }; let yacckind = match matches.opt_str("y") { - None => YaccKind::Original(YaccOriginalActionKind::GenericParseTree), - Some(s) => match &*s.to_lowercase() { + None => YaccKindResolver::NoDefault, + Some(s) => YaccKindResolver::Force(match &*s.to_lowercase() { "eco" => YaccKind::Eco, "grmtools" => YaccKind::Grmtools, "original" => YaccKind::Original(YaccOriginalActionKind::GenericParseTree), _ => usage(prog, &format!("Unknown Yacc variant '{}'.", s)), - }, + }), }; if matches.free.len() != 3 { @@ -154,7 +156,7 @@ fn main() { let yacc_src = read_file(&yacc_y_path); let ast_validation = ASTWithValidityInfo::new(yacckind, &yacc_src); let warnings = ast_validation.ast().warnings(); - let res = YaccGrammar::new_from_ast_with_validity_info(yacckind, &ast_validation); + let res = YaccGrammar::new_from_ast_with_validity_info(&ast_validation); let mut yacc_diagnostic_formatter: Option = None; let grm = match res { Ok(x) => {