diff --git a/cfgrammar/src/lib/yacc/parser.rs b/cfgrammar/src/lib/yacc/parser.rs index 50322a902..9324fc172 100644 --- a/cfgrammar/src/lib/yacc/parser.rs +++ b/cfgrammar/src/lib/yacc/parser.rs @@ -382,34 +382,46 @@ impl YaccParser { update_yacc_kind: bool, ) -> Result { // Compares haystack converted to lowercase to needle (assumed to be lowercase). - fn starts_with_lower(needle: &'static str, haystack: &'_ str) -> bool { + fn starts_with_lower(needle: &'_ str, haystack: &'_ str) -> bool { if let Some((prefix, _)) = haystack.split_at_checked(needle.len()) { prefix.to_lowercase() == needle } else { false } } + const ACTION_KINDS: [(&str, YaccOriginalActionKind); 3] = [ + ("noaction", YaccOriginalActionKind::NoAction), + ("useraction", YaccOriginalActionKind::UserAction), + ("genericparsetree", YaccOriginalActionKind::GenericParseTree), + ]; - const YACC_KINDS: [(&str, YaccKind); 5] = [ - ("grmtools", YaccKind::Grmtools), - ( - "original(noaction)", - YaccKind::Original(YaccOriginalActionKind::NoAction), - ), - ( - "original(useraction)", - YaccKind::Original(YaccOriginalActionKind::UserAction), - ), - ( - "original(genericparsetree)", - YaccKind::Original(YaccOriginalActionKind::GenericParseTree), - ), - ("Eco", YaccKind::Eco), + let mut yacc_kinds = vec![ + ("grmtools".to_string(), YaccKind::Grmtools), + ("yacckind::grmtools".to_string(), YaccKind::Grmtools), + ("Eco".to_string(), YaccKind::Eco), + ("yackind::Eco".to_string(), YaccKind::Eco), ]; + for (name, action_kind) in ACTION_KINDS { + let yk = "YaccKind".to_lowercase(); + let ak = "YaccOriginalActionKind".to_lowercase(); + yacc_kinds.push((format!("original({name})"), YaccKind::Original(action_kind))); + yacc_kinds.push(( + format!("{yk}::original({name})"), + YaccKind::Original(action_kind), + )); + yacc_kinds.push(( + format!("{yk}::original({ak}::{name})"), + YaccKind::Original(action_kind), + )); + yacc_kinds.push(( + format!("original({ak}::{name})"), + YaccKind::Original(action_kind), + )); + } let j = self.parse_ws(i, false)?; let s = &self.src[i..]; - for (kind_name, kind) in YACC_KINDS { - if starts_with_lower(kind_name, s) { + for (kind_name, kind) in yacc_kinds { + if starts_with_lower(&kind_name, s) { if update_yacc_kind { self.yacc_kind = Some(kind); } @@ -2764,4 +2776,31 @@ B"; "; parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap(); } + + #[test] + fn test_grmtools_section_yacckinds() { + let srcs = [ + "%grmtools{yacckind Original(NoAction)} + %% + Start: ;", + "%grmtools{yacckind YaccKind::Original(GenericParseTree)} + %% + Start: ;", + "%grmtools{yacckind YaccKind::Original(yaccoriginalactionkind::useraction)} + %actiontype () + %% + Start: ;", + "%grmtools{yacckind Original(YACCOriginalActionKind::NoAction)} + %% + Start: ;", + "%grmtools{yacckind YaccKind::Grmtools} + %% + Start -> () : ;", + ]; + for src in srcs { + YaccParser::new(YaccKindResolver::NoDefault, src.to_string()) + .parse() + .unwrap(); + } + } } diff --git a/doc/src/SUMMARY.md b/doc/src/SUMMARY.md index e142efb09..8e73b3305 100644 --- a/doc/src/SUMMARY.md +++ b/doc/src/SUMMARY.md @@ -4,10 +4,12 @@ - [Quickstart Guide](quickstart.md) - [Lexing](lexing.md) - [Lex compatibility](lexcompatibility.md) + - [Extensions](lexextensions.md) - [Hand-written lexers](manuallexer.md) - [Start States](start_states.md) - [Parsing](parsing.md) - [Yacc compatibility](yacccompatibility.md) + - [Extensions](yaccextensions.md) - [Return types and action code](actioncode.md) - [grmtools parsing idioms](parsing_idioms.md) - [Error recovery](errorrecovery.md) diff --git a/doc/src/lexcompatibility.md b/doc/src/lexcompatibility.md index a1d47b02b..b003039aa 100644 --- a/doc/src/lexcompatibility.md +++ b/doc/src/lexcompatibility.md @@ -38,7 +38,8 @@ There are several major differences between Lex and grmtools: and ASCII escape sequences. `\\` `\a` `\f` `\n` `\r` `\t` `\v`. Lex also interprets the escape sequence `\b` as `backspace`. While regex treats `\b` - as a word boundary subsequently grmtools will too. + as a word boundary subsequently grmtools will too. The Lex behavior can be enabled + using [posix_escapes](lexextensions.md). Additional escape sequences supported by regex: diff --git a/doc/src/lexextensions.md b/doc/src/lexextensions.md new file mode 100644 index 000000000..764e350f4 --- /dev/null +++ b/doc/src/lexextensions.md @@ -0,0 +1,64 @@ +# Lex extensions + +Flags can be specified at compile time through `LexFlags` or at `.l` file parse time using +a `%grmtools{ }` section. At compile time these flags can be enabled using +[`CTLexerBuilder`](https://docs.rs/lrlex/latest/lrlex/struct.CTLexerBuilder.html) methods. + +Flags commonly affect the parsing of the lex file, the interpretation regular expressions, +and set limits. + +Boolean flags are specified by their name, and can be negated by prefixing with `!` +other flags should specify their value immediately after the flag name. + + +## Example + +``` +%grmtools { + allow_wholeline_comments + !octal + size_limit 1024 +} +%% +. "rule" +``` + + +## List of flags: + +| Flag | Value | Required | Regex[^regex] | +|-------------------------------|-------|----------|---------------| +| `posix_escapes`[^†] | bool | ✗ | ✗ | +| `allow_wholeline_comment`[^‡] | bool | ✗ | ✗ | +| `case_insensitive` | bool | ✗ | ✓ | +| `dot_matches_new_line` | bool | ✗ | ✓ | +| `multi_line` | bool | ✗ | ✓ | +| `octal` | bool | ✗ | ✓ | +| `swap_greed` | bool | ✗ | ✓ | +| `ignore_whitespace` | bool | ✗ | ✓ | +| `unicode` | bool | ✗ | ✓ | +| `size_limit` | usize | ✗ | ✓ | +| `dfa_size_limit` | usize | ✗ | ✓ | +| `nest_limit` | u32 | ✗ | ✓ | + +[^†]: Enable compatibility with posix escape sequences. +[^‡]: Enables rust style `// comments` at the start of lines. +Which requires escaping of `/` when used in a regex. +[^regex]: ✓ Flag gets passed directly to `regex::RegexBuilder`. + + +## Flags affecting Posix compatibility + +As discussed in [Lex compatibility](lexcompatibility.md) the default behaviors of grmtools and rust's regex +library have differed from that of posix lex. + +The following flags can change the behavior to match posix lex more closely. + +``` +%grmtools { + !dot_matches_new_line + posix_escapes +} +%% +... +``` diff --git a/doc/src/yaccextensions.md b/doc/src/yaccextensions.md new file mode 100644 index 000000000..601a5eaa8 --- /dev/null +++ b/doc/src/yaccextensions.md @@ -0,0 +1,17 @@ +# Yacc Extensions + +At the beginning of a `.y` file is a `%grmtools{}` section, by default this section is required. +But a default can be set or forced by using a `YaccKindResolver`. + +| Flag | Value | Required | +|------------|---------------------------------------------|--------------| +| `yacckind` | [YaccKind](yacccompatibility.md#yacckinds) | ✓ | + + +## Example + +``` +%grmtools{yacckind Grmtools} +%% +Start: ; +``` diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index bdd3c220e..bcb8ace1c 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -680,6 +680,18 @@ where self } + /// Enables `// comment` style parsing according to `flag``. + /// When enabled comments can appear at the beginning of a line, + /// and regular expressions with the `/` character should be escaped via `\/`. + /// + /// The default value is `false`. + /// + /// Setting this flag will override the same flag within a `%grmtools` section. + pub fn allow_wholeline_comments(mut self, flag: bool) -> Self { + self.force_lex_flags.allow_wholeline_comments = Some(flag); + self + } + /// Sets the `regex::RegexBuilder` option of the same name. /// The default value is `true`. /// @@ -698,7 +710,7 @@ where self } - /// Sets the `regex::RegexBuilder` option of the same name. + /// Enables posix lex compatible escape sequences according to `flag`. /// The default value is `false`. /// /// Setting this flag will override the same flag within a `%grmtools` section.