diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index a0d317a75..7470d6a3b 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -487,21 +487,21 @@ pub fn lexerdef() -> {lexerdef_type} {{ Some(ref t) => format!("Some({:?})", t), None => "None".to_owned(), }; - let n = match r.name { + let n = match r.name() { Some(ref n) => format!("Some({}.to_string())", quote!(#n)), None => "None".to_owned(), }; - let target_state = match &r.target_state { + let target_state = match &r.target_state() { Some((id, op)) => format!("Some(({}, ::lrlex::StartStateOperation::{:?}))", id, op), None => "None".to_owned(), }; let n_span = format!( "::cfgrammar::Span::new({}, {})", - r.name_span.start(), - r.name_span.end() + r.name_span().start(), + r.name_span().end() ); let regex = &r.re_str; - let start_states = r.start_states.as_slice(); + let start_states = r.start_states(); write!( outs, " diff --git a/lrlex/src/lib/lexer.rs b/lrlex/src/lib/lexer.rs index 7fe9463c4..10afa0277 100644 --- a/lrlex/src/lib/lexer.rs +++ b/lrlex/src/lib/lexer.rs @@ -50,27 +50,32 @@ pub const DEFAULT_REGEX_OPTIONS: RegexOptions = RegexOptions { #[derive(Debug)] #[doc(hidden)] pub struct Rule { - /// If `Some`, the ID that lexemes created against this rule will be given (lrlex gives such - /// rules a guaranteed unique value, though that value can be overridden by clients who need to - /// control the ID). If `None`, then this rule specifies lexemes which should not appear in the - /// user's input. + /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. Note that + /// lrlex gives rules a guaranteed unique value by default, though users can later override + /// that, potentially undermining uniqueness if they're not careful. + /// + /// If `None`, then this rule specifies lexemes which should not appear in the user's input. pub(super) tok_id: Option, /// This rule's name. If None, then text which matches this rule will be skipped (i.e. will not /// create a lexeme). + #[deprecated(note = "Use the name() function")] pub name: Option, + #[deprecated(note = "Use the name_span() function")] pub name_span: Span, pub(super) re_str: String, re: Regex, /// Id(s) of permitted start conditions for the lexer to match this rule. + #[deprecated(note = "Use the start_states() function")] pub start_states: Vec, /// If Some(_), successful matching of this rule will cause the current stack of start /// conditions in the lexer to be updated with the enclosed value, using the designated /// operation. /// If None, successful matching causes no change to the current start condition. + #[deprecated(note = "Use the target_state() function")] pub target_state: Option<(usize, StartStateOperation)>, } -impl Rule { +impl Rule { /// Create a new `Rule`. This interface is unstable and should only be used by code generated /// by lrlex itself. #[doc(hidden)] @@ -115,6 +120,7 @@ impl Rule { } let re = re.build()?; + #[allow(deprecated)] Ok(Rule { tok_id, name, @@ -125,6 +131,44 @@ impl Rule { target_state, }) } + + /// Return this rule's token ID, if any. + /// + /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. If + /// `None`, then this rule specifies lexemes which should not appear in the user's input. + pub fn tok_id(&self) -> Option { + self.tok_id + } + + /// Return this rule's name. If `None`, then text which matches this rule will be skipped (i.e. + /// it will not result in the creation of a [Lexeme]). + pub fn name(&self) -> Option<&str> { + #[allow(deprecated)] + self.name.as_deref() + } + + /// Return the [Span] of this rule's name. + pub fn name_span(&self) -> Span { + #[allow(deprecated)] + self.name_span + } + + /// Return the original regular expression specified by the user for this [Rule]. + pub fn re_str(&self) -> &str { + &self.re_str + } + + /// Return the IDs of the permitted start conditions for the lexer to match this rule. + pub fn start_states(&self) -> &[usize] { + #[allow(deprecated)] + self.start_states.as_slice() + } + + /// Return the IDs of the permitted start conditions for the lexer to match this rule. + pub fn target_state(&self) -> Option<(usize, StartStateOperation)> { + #[allow(deprecated)] + self.target_state.clone() + } } /// Methods which all lexer definitions must implement. @@ -235,7 +279,7 @@ where } fn get_rule_by_name(&self, n: &str) -> Option<&Rule> { - self.rules.iter().find(|r| r.name.as_deref() == Some(n)) + self.rules.iter().find(|r| r.name() == Some(n)) } fn set_rule_ids<'a>( @@ -261,8 +305,8 @@ where let mut missing_from_parser_idxs = Vec::new(); let mut rules_with_names = 0; for (i, r) in self.rules.iter_mut().enumerate() { - if let Some(ref n) = r.name { - match rule_ids_map.get(&**n) { + if let Some(n) = r.name() { + match rule_ids_map.get(n) { Some(tok_id) => r.tok_id = Some(*tok_id), None => { r.tok_id = None; @@ -278,10 +322,7 @@ where } else { let mut mfp = HashSet::with_capacity(missing_from_parser_idxs.len()); for i in &missing_from_parser_idxs { - mfp.insert(( - self.rules[*i].name.as_ref().unwrap().as_str(), - self.rules[*i].name_span, - )); + mfp.insert((self.rules[*i].name().unwrap(), self.rules[*i].name_span())); } Some(mfp) }; @@ -299,8 +340,8 @@ where &self .rules .iter() - .filter(|x| x.name.is_some()) - .map(|x| &**x.name.as_ref().unwrap()) + .filter(|x| x.name().is_some()) + .map(|x| x.name().unwrap()) .collect::>(), ) .cloned() @@ -375,7 +416,7 @@ where Some((_, s)) => s, }; for (ridx, r) in self.iter_rules().enumerate() { - if !Self::state_matches(current_state, &r.start_states) { + if !Self::state_matches(current_state, r.start_states()) { continue; } if let Some(m) = r.re.find(&s[old_i..]) { @@ -390,7 +431,7 @@ where } if longest > 0 { let r = self.get_rule(longest_ridx).unwrap(); - if r.name.is_some() { + if r.name().is_some() { match r.tok_id { Some(tok_id) => { lexemes.push(Ok(Lexeme::new(tok_id, old_i, longest))); @@ -401,7 +442,7 @@ where } } } - if let Some((target_state_id, op)) = &r.target_state { + if let Some((target_state_id, op)) = &r.target_state() { let state = match self.get_start_state_by_id(*target_state_id) { None => { // TODO: I can see an argument for lexing state to be either `None` or `Some(target_state_id)` here @@ -851,18 +892,18 @@ b 'B' .to_string(); let lexerdef = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); assert_eq!( - lexerdef.get_rule_by_name("A").unwrap().name_span, + lexerdef.get_rule_by_name("A").unwrap().name_span(), Span::new(6, 7) ); assert_eq!( - lexerdef.get_rule_by_name("B").unwrap().name_span, + lexerdef.get_rule_by_name("B").unwrap().name_span(), Span::new(12, 13) ); let anonymous_rules = lexerdef .iter_rules() - .filter(|rule| rule.name.is_none()) + .filter(|rule| rule.name().is_none()) .collect::>(); - assert_eq!(anonymous_rules[0].name_span, Span::new(21, 21)); + assert_eq!(anonymous_rules[0].name_span(), Span::new(21, 21)); } #[test] @@ -876,11 +917,11 @@ b 'B' .to_string(); let lexerdef = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); assert_eq!( - lexerdef.get_rule_by_name("A").unwrap().name_span, + lexerdef.get_rule_by_name("A").unwrap().name_span(), Span::new(44, 45) ); assert_eq!( - lexerdef.get_rule_by_name("B").unwrap().name_span, + lexerdef.get_rule_by_name("B").unwrap().name_span(), Span::new(50, 51) ); } @@ -896,11 +937,11 @@ b 'B' .to_string(); let lexerdef = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let a_rule = lexerdef.get_rule_by_name("A").unwrap(); - assert_eq!(a_rule.name_span, Span::new(61, 62)); + assert_eq!(a_rule.name_span(), Span::new(61, 62)); assert_eq!(a_rule.re_str, "a"); let b_rule = lexerdef.get_rule_by_name("B").unwrap(); - assert_eq!(b_rule.name_span, Span::new(84, 85)); + assert_eq!(b_rule.name_span(), Span::new(84, 85)); assert_eq!(b_rule.re_str, "b"); } diff --git a/lrlex/src/lib/parser.rs b/lrlex/src/lib/parser.rs index 5bd3b51d2..b521e696d 100644 --- a/lrlex/src/lib/parser.rs +++ b/lrlex/src/lib/parser.rs @@ -68,7 +68,7 @@ impl StartState { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] #[doc(hidden)] pub enum StartStateOperation { ReplaceStack, @@ -425,15 +425,12 @@ where name = Some(orig_name[1..orig_name.len() - 1].to_string()); name_span = Span::new(i + rspace + 2, i + rspace + orig_name.len()); self.rules.iter().any(|r| { - let dupe = r - .name - .as_ref() - .map_or(false, |n| n == name.as_ref().unwrap()); + let dupe = r.name().map_or(false, |n| n == name.as_ref().unwrap()); if dupe { add_duplicate_occurrence( errs, LexErrorKind::DuplicateName, - r.name_span, + r.name_span(), name_span, ); } @@ -771,13 +768,13 @@ mod test { .to_string(); let ast = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let intrule = ast.get_rule_by_name("int").unwrap(); - assert_eq!("int", intrule.name.as_ref().unwrap()); + assert_eq!("int", intrule.name().unwrap()); assert_eq!("[0-9]+", intrule.re_str); let idrule = ast.get_rule_by_name("id").unwrap(); - assert_eq!("id", idrule.name.as_ref().unwrap()); + assert_eq!("id", idrule.name().unwrap()); assert_eq!("[a-zA-Z]+", idrule.re_str); let plusrule = ast.get_rule_by_name("+").unwrap(); - assert_eq!("+", plusrule.name.as_ref().unwrap()); + assert_eq!("+", plusrule.name().unwrap()); assert_eq!("\\+", plusrule.re_str); } @@ -789,7 +786,7 @@ mod test { .to_string(); let ast = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let intrule = ast.get_rule(0).unwrap(); - assert!(intrule.name.is_none()); + assert!(intrule.name().is_none()); assert_eq!("[0-9]+", intrule.re_str); } @@ -1037,11 +1034,11 @@ mod test { .to_string(); let ast = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let intrule = ast.get_rule(0).unwrap(); - assert_eq!("known", intrule.name.as_ref().unwrap()); + assert_eq!("known", intrule.name().unwrap()); assert_eq!(".", intrule.re_str); - assert!(intrule.target_state.is_none()); - assert_eq!(1, intrule.start_states.len()); - assert_eq!(1, *intrule.start_states.first().unwrap()); + assert!(intrule.target_state().is_none()); + assert_eq!(1, intrule.start_states().len()); + assert_eq!(1, *intrule.start_states().first().unwrap()); } #[test] @@ -1065,13 +1062,13 @@ mod test { .to_string(); let ast = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let intrule = ast.get_rule(0).unwrap(); - assert_eq!("known", intrule.name.as_ref().unwrap()); + assert_eq!("known", intrule.name().unwrap()); assert_eq!(".", intrule.re_str); assert_eq!( (1, StartStateOperation::ReplaceStack), - *intrule.target_state.as_ref().unwrap() + *intrule.target_state().as_ref().unwrap() ); - assert_eq!(0, intrule.start_states.len()); + assert_eq!(0, intrule.start_states().len()); } #[test] @@ -1263,88 +1260,88 @@ a\[\]a 'aboxa' .to_string(); let ast = LRNonStreamingLexerDef::>::from_str(&src).unwrap(); let mut rule = ast.get_rule_by_name("gt").unwrap(); - assert_eq!("gt", rule.name.as_ref().unwrap()); + assert_eq!("gt", rule.name().unwrap()); assert_eq!(">", rule.re_str); rule = ast.get_rule_by_name("lt").unwrap(); - assert_eq!("lt", rule.name.as_ref().unwrap()); + assert_eq!("lt", rule.name().unwrap()); assert_eq!("<", rule.re_str); rule = ast.get_rule_by_name("alt").unwrap(); - assert_eq!("alt", rule.name.as_ref().unwrap()); + assert_eq!("alt", rule.name().unwrap()); assert_eq!("a<", rule.re_str); rule = ast.get_rule_by_name("agt").unwrap(); - assert_eq!("agt", rule.name.as_ref().unwrap()); + assert_eq!("agt", rule.name().unwrap()); assert_eq!("a>", rule.re_str); rule = ast.get_rule_by_name("elt").unwrap(); - assert_eq!("elt", rule.name.as_ref().unwrap()); + assert_eq!("elt", rule.name().unwrap()); assert_eq!("e<<", rule.re_str); rule = ast.get_rule_by_name("egt").unwrap(); - assert_eq!("egt", rule.name.as_ref().unwrap()); + assert_eq!("egt", rule.name().unwrap()); assert_eq!("e>>", rule.re_str); rule = ast.get_rule_by_name("forall").unwrap(); - assert_eq!("forall", rule.name.as_ref().unwrap()); + assert_eq!("forall", rule.name().unwrap()); assert_eq!("∀", rule.re_str); rule = ast.get_rule_by_name("forall2").unwrap(); - assert_eq!("forall2", rule.name.as_ref().unwrap()); + assert_eq!("forall2", rule.name().unwrap()); assert_eq!("∀∀", rule.re_str); rule = ast.get_rule_by_name("forall3").unwrap(); - assert_eq!("forall3", rule.name.as_ref().unwrap()); + assert_eq!("forall3", rule.name().unwrap()); assert_eq!("∀∀∀∀∀", rule.re_str); rule = ast.get_rule_by_name("forall4").unwrap(); - assert_eq!("forall4", rule.name.as_ref().unwrap()); + assert_eq!("forall4", rule.name().unwrap()); assert_eq!("∀∀∀∀∀", rule.re_str); rule = ast.get_rule_by_name("box").unwrap(); - assert_eq!("box", rule.name.as_ref().unwrap()); + assert_eq!("box", rule.name().unwrap()); assert_eq!(r"\[\]", rule.re_str); rule = ast.get_rule_by_name("abox").unwrap(); - assert_eq!("abox", rule.name.as_ref().unwrap()); + assert_eq!("abox", rule.name().unwrap()); assert_eq!(r"a\[\]", rule.re_str); rule = ast.get_rule_by_name("aboxa").unwrap(); - assert_eq!("aboxa", rule.name.as_ref().unwrap()); + assert_eq!("aboxa", rule.name().unwrap()); assert_eq!(r"a\[\]a", rule.re_str); rule = ast.get_rule_by_name("hex").unwrap(); - assert_eq!("hex", rule.name.as_ref().unwrap()); + assert_eq!("hex", rule.name().unwrap()); assert_eq!(r"\x2a", rule.re_str); rule = ast.get_rule_by_name("octal").unwrap(); - assert_eq!("octal", rule.name.as_ref().unwrap()); + assert_eq!("octal", rule.name().unwrap()); assert_eq!(r"\052", rule.re_str); rule = ast.get_rule_by_name("newline").unwrap(); - assert_eq!("newline", rule.name.as_ref().unwrap()); + assert_eq!("newline", rule.name().unwrap()); assert_eq!(r"\n*", rule.re_str); rule = ast.get_rule_by_name("alert").unwrap(); - assert_eq!("alert", rule.name.as_ref().unwrap()); + assert_eq!("alert", rule.name().unwrap()); assert_eq!(r"\a*", rule.re_str); rule = ast.get_rule_by_name("backslash").unwrap(); - assert_eq!("backslash", rule.name.as_ref().unwrap()); + assert_eq!("backslash", rule.name().unwrap()); assert_eq!(r"\\*", rule.re_str); rule = ast.get_rule_by_name("backslash2").unwrap(); - assert_eq!("backslash2", rule.name.as_ref().unwrap()); + assert_eq!("backslash2", rule.name().unwrap()); assert_eq!(r"\\\\*", rule.re_str); rule = ast.get_rule_by_name("backslash_newline_a").unwrap(); - assert_eq!("backslash_newline_a", rule.name.as_ref().unwrap()); + assert_eq!("backslash_newline_a", rule.name().unwrap()); assert_eq!(r"[\\\na]*", rule.re_str); rule = ast.get_rule_by_name("backslash_angle_a").unwrap(); - assert_eq!("backslash_angle_a", rule.name.as_ref().unwrap()); + assert_eq!("backslash_angle_a", rule.name().unwrap()); assert_eq!(r"[\\>(); let mut rule = ast.get_rule_by_name("OPEN_BRACE").unwrap(); - assert_eq!("OPEN_BRACE", rule.name.as_ref().unwrap()); + assert_eq!("OPEN_BRACE", rule.name().unwrap()); assert_eq!(r"\{", rule.re_str); - assert_eq!(1, rule.start_states.len()); + assert_eq!(1, rule.start_states().len()); assert_eq!( "brace", - rule.start_states + rule.start_states() .iter() .map(|s| states.get(s).unwrap()) .next() .unwrap() ); - assert!(rule.target_state.is_some()); + assert!(rule.target_state().is_some()); assert_eq!( "brace", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::Push, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); rule = ast.get_rule_by_name("CLOSE_BRACE").unwrap(); - assert_eq!("CLOSE_BRACE", rule.name.as_ref().unwrap()); + assert_eq!("CLOSE_BRACE", rule.name().unwrap()); assert_eq!(r"\}", rule.re_str); - assert_eq!(1, rule.start_states.len()); + assert_eq!(1, rule.start_states().len()); assert_eq!( "brace", - rule.start_states + rule.start_states() .iter() .map(|s| states.get(s).unwrap()) .next() .unwrap() ); - assert!(rule.target_state.is_some()); + assert!(rule.target_state().is_some()); assert_eq!( "brace", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::Pop, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); rule = ast.get_rule_by_name("OPEN_BRACKET").unwrap(); - assert_eq!("OPEN_BRACKET", rule.name.as_ref().unwrap()); + assert_eq!("OPEN_BRACKET", rule.name().unwrap()); assert_eq!(r"\[", rule.re_str); - assert_eq!(1, rule.start_states.len()); + assert_eq!(1, rule.start_states().len()); assert_eq!( "bracket", - rule.start_states + rule.start_states() .iter() .map(|s| states.get(s).unwrap()) .next() .unwrap() ); - assert!(rule.target_state.is_some()); + assert!(rule.target_state().is_some()); assert_eq!( "bracket", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::Push, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); rule = ast.get_rule_by_name("CLOSE_BRACKET").unwrap(); - assert_eq!("CLOSE_BRACKET", rule.name.as_ref().unwrap()); + assert_eq!("CLOSE_BRACKET", rule.name().unwrap()); assert_eq!(r"\]", rule.re_str); - assert_eq!(1, rule.start_states.len()); + assert_eq!(1, rule.start_states().len()); assert_eq!( "bracket", - rule.start_states + rule.start_states() .iter() .map(|s| states.get(s).unwrap()) .next() .unwrap() ); - assert!(rule.target_state.is_some()); + assert!(rule.target_state().is_some()); assert_eq!( "bracket", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::Pop, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); rule = ast.get_rule_by_name("OPEN_FIRST_BRACE").unwrap(); - assert_eq!("OPEN_FIRST_BRACE", rule.name.as_ref().unwrap()); + assert_eq!("OPEN_FIRST_BRACE", rule.name().unwrap()); assert_eq!(r"\{", rule.re_str); - assert_eq!(0, rule.start_states.len()); - assert!(rule.target_state.is_some()); + assert_eq!(0, rule.start_states().len()); + assert!(rule.target_state().is_some()); assert_eq!( "brace", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::ReplaceStack, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); rule = ast.get_rule_by_name("OPEN_FIRST_BRACKET").unwrap(); - assert_eq!("OPEN_FIRST_BRACKET", rule.name.as_ref().unwrap()); + assert_eq!("OPEN_FIRST_BRACKET", rule.name().unwrap()); assert_eq!(r"\[", rule.re_str); - assert_eq!(0, rule.start_states.len()); - assert!(rule.target_state.is_some()); + assert_eq!(0, rule.start_states().len()); + assert!(rule.target_state().is_some()); assert_eq!( "bracket", - rule.target_state + rule.target_state() .as_ref() .map(|(s, _)| states.get(s).unwrap()) .unwrap() ); assert_eq!( StartStateOperation::ReplaceStack, - *rule.target_state.as_ref().map(|(_, s)| s).unwrap() + *rule.target_state().as_ref().map(|(_, s)| s).unwrap() ); } diff --git a/lrlex/src/main.rs b/lrlex/src/main.rs index 4975c49d1..e2ce6647b 100644 --- a/lrlex/src/main.rs +++ b/lrlex/src/main.rs @@ -84,7 +84,7 @@ fn main() { match r { Ok(l) => println!( "{} {}", - lexerdef.get_rule_by_id(l.tok_id()).name.as_ref().unwrap(), + lexerdef.get_rule_by_id(l.tok_id()).name().unwrap(), &input[l.span().start()..l.span().end()] ), Err(e) => {