Skip to content

Commit a9ea8db

Browse files
authored
Merge pull request #499 from ratmice/document_grmtools_section
Document grmtools section
2 parents 683d4fe + 66553f6 commit a9ea8db

File tree

6 files changed

+155
-20
lines changed

6 files changed

+155
-20
lines changed

cfgrammar/src/lib/yacc/parser.rs

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -382,34 +382,46 @@ impl YaccParser {
382382
update_yacc_kind: bool,
383383
) -> Result<usize, YaccGrammarError> {
384384
// Compares haystack converted to lowercase to needle (assumed to be lowercase).
385-
fn starts_with_lower(needle: &'static str, haystack: &'_ str) -> bool {
385+
fn starts_with_lower(needle: &'_ str, haystack: &'_ str) -> bool {
386386
if let Some((prefix, _)) = haystack.split_at_checked(needle.len()) {
387387
prefix.to_lowercase() == needle
388388
} else {
389389
false
390390
}
391391
}
392+
const ACTION_KINDS: [(&str, YaccOriginalActionKind); 3] = [
393+
("noaction", YaccOriginalActionKind::NoAction),
394+
("useraction", YaccOriginalActionKind::UserAction),
395+
("genericparsetree", YaccOriginalActionKind::GenericParseTree),
396+
];
392397

393-
const YACC_KINDS: [(&str, YaccKind); 5] = [
394-
("grmtools", YaccKind::Grmtools),
395-
(
396-
"original(noaction)",
397-
YaccKind::Original(YaccOriginalActionKind::NoAction),
398-
),
399-
(
400-
"original(useraction)",
401-
YaccKind::Original(YaccOriginalActionKind::UserAction),
402-
),
403-
(
404-
"original(genericparsetree)",
405-
YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
406-
),
407-
("Eco", YaccKind::Eco),
398+
let mut yacc_kinds = vec![
399+
("grmtools".to_string(), YaccKind::Grmtools),
400+
("yacckind::grmtools".to_string(), YaccKind::Grmtools),
401+
("Eco".to_string(), YaccKind::Eco),
402+
("yackind::Eco".to_string(), YaccKind::Eco),
408403
];
404+
for (name, action_kind) in ACTION_KINDS {
405+
let yk = "YaccKind".to_lowercase();
406+
let ak = "YaccOriginalActionKind".to_lowercase();
407+
yacc_kinds.push((format!("original({name})"), YaccKind::Original(action_kind)));
408+
yacc_kinds.push((
409+
format!("{yk}::original({name})"),
410+
YaccKind::Original(action_kind),
411+
));
412+
yacc_kinds.push((
413+
format!("{yk}::original({ak}::{name})"),
414+
YaccKind::Original(action_kind),
415+
));
416+
yacc_kinds.push((
417+
format!("original({ak}::{name})"),
418+
YaccKind::Original(action_kind),
419+
));
420+
}
409421
let j = self.parse_ws(i, false)?;
410422
let s = &self.src[i..];
411-
for (kind_name, kind) in YACC_KINDS {
412-
if starts_with_lower(kind_name, s) {
423+
for (kind_name, kind) in yacc_kinds {
424+
if starts_with_lower(&kind_name, s) {
413425
if update_yacc_kind {
414426
self.yacc_kind = Some(kind);
415427
}
@@ -2764,4 +2776,31 @@ B";
27642776
";
27652777
parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
27662778
}
2779+
2780+
#[test]
2781+
fn test_grmtools_section_yacckinds() {
2782+
let srcs = [
2783+
"%grmtools{yacckind Original(NoAction)}
2784+
%%
2785+
Start: ;",
2786+
"%grmtools{yacckind YaccKind::Original(GenericParseTree)}
2787+
%%
2788+
Start: ;",
2789+
"%grmtools{yacckind YaccKind::Original(yaccoriginalactionkind::useraction)}
2790+
%actiontype ()
2791+
%%
2792+
Start: ;",
2793+
"%grmtools{yacckind Original(YACCOriginalActionKind::NoAction)}
2794+
%%
2795+
Start: ;",
2796+
"%grmtools{yacckind YaccKind::Grmtools}
2797+
%%
2798+
Start -> () : ;",
2799+
];
2800+
for src in srcs {
2801+
YaccParser::new(YaccKindResolver::NoDefault, src.to_string())
2802+
.parse()
2803+
.unwrap();
2804+
}
2805+
}
27672806
}

doc/src/SUMMARY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
- [Quickstart Guide](quickstart.md)
55
- [Lexing](lexing.md)
66
- [Lex compatibility](lexcompatibility.md)
7+
- [Extensions](lexextensions.md)
78
- [Hand-written lexers](manuallexer.md)
89
- [Start States](start_states.md)
910
- [Parsing](parsing.md)
1011
- [Yacc compatibility](yacccompatibility.md)
12+
- [Extensions](yaccextensions.md)
1113
- [Return types and action code](actioncode.md)
1214
- [grmtools parsing idioms](parsing_idioms.md)
1315
- [Error recovery](errorrecovery.md)

doc/src/lexcompatibility.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ There are several major differences between Lex and grmtools:
3838
and ASCII escape sequences. `\\` `\a` `\f` `\n` `\r` `\t` `\v`.
3939

4040
Lex also interprets the escape sequence `\b` as `backspace`. While regex treats `\b`
41-
as a word boundary subsequently grmtools will too.
41+
as a word boundary subsequently grmtools will too. The Lex behavior can be enabled
42+
using [posix_escapes](lexextensions.md).
4243

4344
Additional escape sequences supported by regex:
4445

doc/src/lexextensions.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Lex extensions
2+
3+
Flags can be specified at compile time through `LexFlags` or at `.l` file parse time using
4+
a `%grmtools{ }` section. At compile time these flags can be enabled using
5+
[`CTLexerBuilder`](https://docs.rs/lrlex/latest/lrlex/struct.CTLexerBuilder.html) methods.
6+
7+
Flags commonly affect the parsing of the lex file, the interpretation regular expressions,
8+
and set limits.
9+
10+
Boolean flags are specified by their name, and can be negated by prefixing with `!`
11+
other flags should specify their value immediately after the flag name.
12+
13+
14+
## Example
15+
16+
```
17+
%grmtools {
18+
allow_wholeline_comments
19+
!octal
20+
size_limit 1024
21+
}
22+
%%
23+
. "rule"
24+
```
25+
26+
27+
## List of flags:
28+
29+
| Flag | Value | Required | Regex[^regex] |
30+
|-------------------------------|-------|----------|---------------|
31+
| `posix_escapes`[^] | bool | &cross; | &cross; |
32+
| `allow_wholeline_comment`[^] | bool | &cross; | &cross; |
33+
| `case_insensitive` | bool | &cross; | &checkmark; |
34+
| `dot_matches_new_line` | bool | &cross; | &checkmark; |
35+
| `multi_line` | bool | &cross; | &checkmark; |
36+
| `octal` | bool | &cross; | &checkmark; |
37+
| `swap_greed` | bool | &cross; | &checkmark; |
38+
| `ignore_whitespace` | bool | &cross; | &checkmark; |
39+
| `unicode` | bool | &cross; | &checkmark; |
40+
| `size_limit` | usize | &cross; | &checkmark; |
41+
| `dfa_size_limit` | usize | &cross; | &checkmark; |
42+
| `nest_limit` | u32 | &cross; | &checkmark; |
43+
44+
[^]: Enable compatibility with posix escape sequences.
45+
[^]: Enables rust style `// comments` at the start of lines.
46+
Which requires escaping of `/` when used in a regex.
47+
[^regex]: &checkmark; Flag gets passed directly to `regex::RegexBuilder`.
48+
49+
50+
## Flags affecting Posix compatibility
51+
52+
As discussed in [Lex compatibility](lexcompatibility.md) the default behaviors of grmtools and rust's regex
53+
library have differed from that of posix lex.
54+
55+
The following flags can change the behavior to match posix lex more closely.
56+
57+
```
58+
%grmtools {
59+
!dot_matches_new_line
60+
posix_escapes
61+
}
62+
%%
63+
...
64+
```

doc/src/yaccextensions.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Yacc Extensions
2+
3+
At the beginning of a `.y` file is a `%grmtools{}` section, by default this section is required.
4+
But a default can be set or forced by using a `YaccKindResolver`.
5+
6+
| Flag | Value | Required |
7+
|------------|---------------------------------------------|--------------|
8+
| `yacckind` | [YaccKind](yacccompatibility.md#yacckinds) | &checkmark; |
9+
10+
11+
## Example
12+
13+
```
14+
%grmtools{yacckind Grmtools}
15+
%%
16+
Start: ;
17+
```

lrlex/src/lib/ctbuilder.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,18 @@ where
680680
self
681681
}
682682

683+
/// Enables `// comment` style parsing according to `flag``.
684+
/// When enabled comments can appear at the beginning of a line,
685+
/// and regular expressions with the `/` character should be escaped via `\/`.
686+
///
687+
/// The default value is `false`.
688+
///
689+
/// Setting this flag will override the same flag within a `%grmtools` section.
690+
pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
691+
self.force_lex_flags.allow_wholeline_comments = Some(flag);
692+
self
693+
}
694+
683695
/// Sets the `regex::RegexBuilder` option of the same name.
684696
/// The default value is `true`.
685697
///
@@ -698,7 +710,7 @@ where
698710
self
699711
}
700712

701-
/// Sets the `regex::RegexBuilder` option of the same name.
713+
/// Enables posix lex compatible escape sequences according to `flag`.
702714
/// The default value is `false`.
703715
///
704716
/// Setting this flag will override the same flag within a `%grmtools` section.

0 commit comments

Comments
 (0)