Skip to content

Commit f229503

Browse files
authored
Merge pull request #498 from ratmice/lrlex_allow_wholeline_comments
Add allow_wholeline_comments LexFlag.
2 parents 418431c + c5a8fa1 commit f229503

File tree

3 files changed

+69
-5
lines changed

3 files changed

+69
-5
lines changed

lrlex/src/lib/ctbuilder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
482482
.ok();
483483

484484
let LexFlags {
485+
allow_wholeline_comments,
485486
dot_matches_new_line,
486487
multi_line,
487488
octal,
@@ -494,6 +495,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
494495
dfa_size_limit,
495496
nest_limit,
496497
} = lex_flags;
498+
let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
497499
let dot_matches_new_line = QuoteOption(dot_matches_new_line);
498500
let multi_line = QuoteOption(multi_line);
499501
let octal = QuoteOption(octal);
@@ -508,6 +510,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
508510

509511
outs.push_str(&format!(
510512
"let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
513+
lex_flags.allow_wholeline_comments = {allow_wholeline_comments};
511514
lex_flags.dot_matches_new_line = {dot_matches_new_line};
512515
lex_flags.multi_line = {multi_line};
513516
lex_flags.octal = {octal};
@@ -521,6 +524,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
521524
lex_flags.nest_limit = {nest_limit};
522525
let lex_flags = lex_flags;
523526
",
527+
allow_wholeline_comments = quote!(#allow_wholeline_comments),
524528
dot_matches_new_line = quote!(#dot_matches_new_line),
525529
multi_line = quote!(#multi_line),
526530
octal = quote!(#octal),

lrlex/src/lib/lexer.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub struct LexFlags {
2828
pub multi_line: Option<bool>,
2929
pub octal: Option<bool>,
3030
pub posix_escapes: Option<bool>,
31+
pub allow_wholeline_comments: Option<bool>,
3132

3233
// All the following values when `None` default to the `regex` crate's default value.
3334
pub case_insensitive: Option<bool>,
@@ -43,6 +44,9 @@ impl LexFlags {
4344
/// Merges flags from `other` into `self`
4445
/// Flags which are `Some` in `other` overriding flags in self.
4546
pub fn merge_from(&mut self, other: &Self) {
47+
if other.allow_wholeline_comments.is_some() {
48+
self.allow_wholeline_comments = other.allow_wholeline_comments;
49+
}
4650
if other.dot_matches_new_line.is_some() {
4751
self.dot_matches_new_line = other.dot_matches_new_line;
4852
}
@@ -81,6 +85,7 @@ impl LexFlags {
8185

8286
/// LexFlags with flags set to default values.
8387
pub const DEFAULT_LEX_FLAGS: LexFlags = LexFlags {
88+
allow_wholeline_comments: Some(false),
8489
dot_matches_new_line: Some(true),
8590
multi_line: Some(true),
8691
octal: Some(true),
@@ -96,6 +101,7 @@ pub const DEFAULT_LEX_FLAGS: LexFlags = LexFlags {
96101

97102
/// LexFlags with all of the values `None`.
98103
pub const UNSPECIFIED_LEX_FLAGS: LexFlags = LexFlags {
104+
allow_wholeline_comments: None,
99105
dot_matches_new_line: None,
100106
multi_line: None,
101107
octal: None,

lrlex/src/lib/parser.rs

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ where
240240
span_map: &mut HashMap<&str, Span>,
241241
lex_flags: &mut LexFlags,
242242
) -> LexInternalBuildResult<usize> {
243-
const OPTIONS: [&str; 11] = [
243+
const OPTIONS: [&str; 12] = [
244+
"allow_wholeline_comments",
244245
"dot_matches_new_line",
245246
"multi_line",
246247
"octal",
@@ -273,6 +274,7 @@ where
273274
});
274275
}
275276
match opt {
277+
"allow_wholeline_comments" => lex_flags.allow_wholeline_comments = Some(flag),
276278
"case_insensitive" => lex_flags.case_insensitive = Some(flag),
277279
"swap_greed" => lex_flags.swap_greed = Some(flag),
278280
"ignore_whitespace" => lex_flags.ignore_whitespace = Some(flag),
@@ -350,6 +352,15 @@ where
350352
self.lex_flags.merge_from(&self.default_lex_flags);
351353
loop {
352354
i = self.parse_ws(i)?;
355+
if self.lex_flags.allow_wholeline_comments.unwrap_or(false)
356+
&& self.lookahead_is("//", i).is_some()
357+
{
358+
i = RE_LINE_SEP
359+
.find(&self.src[i..])
360+
.map(|m| m.start() + i)
361+
.unwrap_or(self.src.len());
362+
continue;
363+
}
353364
if i == self.src.len() {
354365
break Err(self.mk_error(LexErrorKind::PrematureEnd, i));
355366
}
@@ -470,6 +481,16 @@ where
470481
// We should be at newline of the previous section separator '%%<here>\n upon entry,
471482
// otherwise after iterating before the newline of the previous iterations rule or at eof.
472483
i = self.parse_nl(i)?;
484+
let line_len = RE_LINE_SEP
485+
.find(&self.src[i..])
486+
.map(|m| m.start())
487+
.unwrap_or(self.src.len() - i);
488+
if self.lex_flags.allow_wholeline_comments.unwrap_or(false)
489+
&& self.lookahead_is("//", i).is_some()
490+
{
491+
i += line_len;
492+
continue;
493+
}
473494
// According to posix lex:
474495
//
475496
// > Any such input (beginning with a <blank> or within "%{" and "%}" delimiter lines)
@@ -486,10 +507,6 @@ where
486507
// Previously we allowed these, and trimmed leading spaces, parsing any rules after them. Currently we will emit an error.
487508
let j = self.parse_ws(i)?;
488509
if j != i {
489-
let line_len = RE_LINE_SEP
490-
.find(&self.src[i..])
491-
.map(|m| m.start())
492-
.unwrap_or(self.src.len() - i);
493510
let err = LexBuildError {
494511
kind: LexErrorKind::VerbatimNotSupported,
495512
spans: vec![Span::new(i, i + line_len)],
@@ -1876,4 +1893,41 @@ b "A"
18761893
3,
18771894
);
18781895
}
1896+
1897+
#[test]
1898+
fn test_comments() {
1899+
let src = r#"
1900+
%grmtools {allow_wholeline_comments}
1901+
// comment
1902+
%s InclusiveState
1903+
%%
1904+
// "comment but an invalid rule name if parsed as a rule"
1905+
\/\/ 'escaping_required'
1906+
. 'dot'
1907+
"#;
1908+
LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(src).unwrap();
1909+
let src = r#"
1910+
// comments not allowed.
1911+
%s InclusiveState
1912+
%%
1913+
. 'dot'
1914+
"#;
1915+
LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(src).expect_error_at_line_col(
1916+
src,
1917+
LexErrorKind::UnknownDeclaration,
1918+
2,
1919+
1,
1920+
);
1921+
let src = r#"
1922+
%%
1923+
// "Invalid rule name"
1924+
. 'dot'
1925+
"#;
1926+
LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(src).expect_error_at_line_col(
1927+
src,
1928+
LexErrorKind::InvalidName,
1929+
3,
1930+
18,
1931+
);
1932+
}
18791933
}

0 commit comments

Comments
 (0)