@@ -240,7 +240,8 @@ where
240240 span_map : & mut HashMap < & str , Span > ,
241241 lex_flags : & mut LexFlags ,
242242 ) -> LexInternalBuildResult < usize > {
243- const OPTIONS : [ & str ; 11 ] = [
243+ const OPTIONS : [ & str ; 12 ] = [
244+ "allow_wholeline_comments" ,
244245 "dot_matches_new_line" ,
245246 "multi_line" ,
246247 "octal" ,
@@ -273,6 +274,7 @@ where
273274 } ) ;
274275 }
275276 match opt {
277+ "allow_wholeline_comments" => lex_flags. allow_wholeline_comments = Some ( flag) ,
276278 "case_insensitive" => lex_flags. case_insensitive = Some ( flag) ,
277279 "swap_greed" => lex_flags. swap_greed = Some ( flag) ,
278280 "ignore_whitespace" => lex_flags. ignore_whitespace = Some ( flag) ,
@@ -350,6 +352,15 @@ where
350352 self . lex_flags . merge_from ( & self . default_lex_flags ) ;
351353 loop {
352354 i = self . parse_ws ( i) ?;
355+ if self . lex_flags . allow_wholeline_comments . unwrap_or ( false )
356+ && self . lookahead_is ( "//" , i) . is_some ( )
357+ {
358+ i = RE_LINE_SEP
359+ . find ( & self . src [ i..] )
360+ . map ( |m| m. start ( ) + i)
361+ . unwrap_or ( self . src . len ( ) ) ;
362+ continue ;
363+ }
353364 if i == self . src . len ( ) {
354365 break Err ( self . mk_error ( LexErrorKind :: PrematureEnd , i) ) ;
355366 }
@@ -470,6 +481,16 @@ where
470481 // We should be at newline of the previous section separator '%%<here>\n upon entry,
471482 // otherwise after iterating before the newline of the previous iterations rule or at eof.
472483 i = self . parse_nl ( i) ?;
484+ let line_len = RE_LINE_SEP
485+ . find ( & self . src [ i..] )
486+ . map ( |m| m. start ( ) )
487+ . unwrap_or ( self . src . len ( ) - i) ;
488+ if self . lex_flags . allow_wholeline_comments . unwrap_or ( false )
489+ && self . lookahead_is ( "//" , i) . is_some ( )
490+ {
491+ i += line_len;
492+ continue ;
493+ }
473494 // According to posix lex:
474495 //
475496 // > Any such input (beginning with a <blank> or within "%{" and "%}" delimiter lines)
@@ -486,10 +507,6 @@ where
486507 // Previously we allowed these, and trimmed leading spaces, parsing any rules after them. Currently we will emit an error.
487508 let j = self . parse_ws ( i) ?;
488509 if j != i {
489- let line_len = RE_LINE_SEP
490- . find ( & self . src [ i..] )
491- . map ( |m| m. start ( ) )
492- . unwrap_or ( self . src . len ( ) - i) ;
493510 let err = LexBuildError {
494511 kind : LexErrorKind :: VerbatimNotSupported ,
495512 spans : vec ! [ Span :: new( i, i + line_len) ] ,
@@ -1876,4 +1893,41 @@ b "A"
18761893 3 ,
18771894 ) ;
18781895 }
1896+
1897+ #[ test]
1898+ fn test_comments ( ) {
1899+ let src = r#"
1900+ %grmtools {allow_wholeline_comments}
1901+ // comment
1902+ %s InclusiveState
1903+ %%
1904+ // "comment but an invalid rule name if parsed as a rule"
1905+ \/\/ 'escaping_required'
1906+ . 'dot'
1907+ "# ;
1908+ LRNonStreamingLexerDef :: < DefaultLexerTypes < u8 > > :: from_str ( src) . unwrap ( ) ;
1909+ let src = r#"
1910+ // comments not allowed.
1911+ %s InclusiveState
1912+ %%
1913+ . 'dot'
1914+ "# ;
1915+ LRNonStreamingLexerDef :: < DefaultLexerTypes < u8 > > :: from_str ( src) . expect_error_at_line_col (
1916+ src,
1917+ LexErrorKind :: UnknownDeclaration ,
1918+ 2 ,
1919+ 1 ,
1920+ ) ;
1921+ let src = r#"
1922+ %%
1923+ // "Invalid rule name"
1924+ . 'dot'
1925+ "# ;
1926+ LRNonStreamingLexerDef :: < DefaultLexerTypes < u8 > > :: from_str ( src) . expect_error_at_line_col (
1927+ src,
1928+ LexErrorKind :: InvalidName ,
1929+ 3 ,
1930+ 18 ,
1931+ ) ;
1932+ }
18791933}
0 commit comments