Skip to content

Commit d98034d

Browse files
committed
Utilize the previously added %grmtools section.
This patch has a number of breaking changes. * Renames `RegexOptions` to `LexFlags`. * Changes some `bool` flags in `RegexOptions` to `Option<bool>` * Implements `merge_from` for merging `LexFlags` together * Add parameters to `LRNonStreamingLexerDef` that implements *forcing* and *default* behaviors for LexFlags. * Adds a `default_lex_flags()` setting to `CTLexerBuilder` * Implements the existing `CTLexerBuilder` `RegexOption` methods e.g. `dot_matches_new_line` and others using the new forcing parameters. * Adds a `lex_flags()` method to `LrNonStreamingLexerDef` This is only set when the `LexerDef` was parsed from a source file. When constructing the `LexerDef` using `from_rules` the `lex_flags` method will return `None`.
1 parent a99aded commit d98034d

File tree

4 files changed

+217
-97
lines changed

4 files changed

+217
-97
lines changed

lrlex/src/lib/ctbuilder.rs

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ use quote::{quote, ToTokens, TokenStreamExt};
2424
use regex::Regex;
2525
use serde::Serialize;
2626

27-
use crate::{
28-
DefaultLexerTypes, LRNonStreamingLexerDef, LexerDef, RegexOptions, DEFAULT_REGEX_OPTIONS,
29-
};
27+
use crate::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef, UNSPECIFIED_LEX_FLAGS};
3028

3129
const RUST_FILE_EXT: &str = "rs";
3230

@@ -133,7 +131,8 @@ where
133131
rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
134132
allow_missing_terms_in_lexer: bool,
135133
allow_missing_tokens_in_parser: bool,
136-
regex_options: RegexOptions,
134+
force_lex_flags: LexFlags,
135+
default_lex_flags: LexFlags,
137136
}
138137

139138
impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
@@ -177,7 +176,8 @@ where
177176
rule_ids_map: None,
178177
allow_missing_terms_in_lexer: false,
179178
allow_missing_tokens_in_parser: true,
180-
regex_options: DEFAULT_REGEX_OPTIONS,
179+
force_lex_flags: UNSPECIFIED_LEX_FLAGS,
180+
default_lex_flags: UNSPECIFIED_LEX_FLAGS,
181181
}
182182
}
183183

@@ -366,11 +366,14 @@ where
366366
let lex_src = read_to_string(lexerp)
367367
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
368368
let line_cache = NewlineCache::from_str(&lex_src).unwrap();
369-
let mut lexerdef: Box<dyn LexerDef<LexerTypesT>> = match self.lexerkind {
370-
LexerKind::LRNonStreamingLexer => Box::new(
371-
LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
369+
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match self
370+
.lexerkind
371+
{
372+
LexerKind::LRNonStreamingLexer => {
373+
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
372374
&lex_src,
373-
self.regex_options.clone(),
375+
self.force_lex_flags.clone(),
376+
self.default_lex_flags.clone(),
374377
)
375378
.map_err(|errs| {
376379
errs.iter()
@@ -386,8 +389,10 @@ where
386389
})
387390
.collect::<Vec<_>>()
388391
.join("\n")
389-
})?,
390-
),
392+
})?;
393+
let lex_flags = lexerdef.lex_flags().cloned();
394+
(Box::new(lexerdef), lex_flags.unwrap())
395+
}
391396
};
392397
let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
393398
Some(ref rim) => {
@@ -476,7 +481,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
476481
)
477482
.ok();
478483

479-
let RegexOptions {
484+
let LexFlags {
480485
dot_matches_new_line,
481486
multi_line,
482487
octal,
@@ -488,7 +493,11 @@ pub fn lexerdef() -> {lexerdef_type} {{
488493
size_limit,
489494
dfa_size_limit,
490495
nest_limit,
491-
} = self.regex_options;
496+
} = lex_flags;
497+
let dot_matches_new_line = QuoteOption(dot_matches_new_line);
498+
let multi_line = QuoteOption(multi_line);
499+
let octal = QuoteOption(octal);
500+
let posix_escapes = QuoteOption(posix_escapes);
492501
let case_insensitive = QuoteOption(case_insensitive);
493502
let unicode = QuoteOption(unicode);
494503
let swap_greed = QuoteOption(swap_greed);
@@ -498,7 +507,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
498507
let nest_limit = QuoteOption(nest_limit);
499508

500509
outs.push_str(&format!(
501-
"let regex_options = ::lrlex::RegexOptions {{
510+
"let lex_flags = ::lrlex::LexFlags {{
502511
dot_matches_new_line: {dot_matches_new_line},
503512
multi_line: {multi_line},
504513
octal: {octal},
@@ -553,7 +562,7 @@ pub fn lexerdef() -> {lexerdef_type} {{
553562
write!(
554563
outs,
555564
"
556-
Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, &regex_options).unwrap(),",
565+
Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, &lex_flags).unwrap(),",
557566
quote!(#tok_id),
558567
quote!(#n),
559568
quote!(#n_span),
@@ -704,78 +713,108 @@ pub fn lexerdef() -> {lexerdef_type} {{
704713

705714
/// Sets the `regex::RegexBuilder` option of the same name.
706715
/// The default value is `true`.
716+
///
717+
/// Setting this flag will override the same flag within a `%grmtools` section.
707718
pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
708-
self.regex_options.dot_matches_new_line = flag;
719+
self.force_lex_flags.dot_matches_new_line = Some(flag);
709720
self
710721
}
711722

712723
/// Sets the `regex::RegexBuilder` option of the same name.
713724
/// The default value is `true`.
725+
///
726+
/// Setting this flag will override the same flag within a `%grmtools` section.
714727
pub fn multi_line(mut self, flag: bool) -> Self {
715-
self.regex_options.multi_line = flag;
728+
self.force_lex_flags.multi_line = Some(flag);
716729
self
717730
}
718731

719732
/// Sets the `regex::RegexBuilder` option of the same name.
720733
/// The default value is `false`.
734+
///
735+
/// Setting this flag will override the same flag within a `%grmtools` section.
721736
pub fn posix_escapes(mut self, flag: bool) -> Self {
722-
self.regex_options.posix_escapes = flag;
737+
self.force_lex_flags.posix_escapes = Some(flag);
723738
self
724739
}
725740

726741
/// Sets the `regex::RegexBuilder` option of the same name.
727742
/// The default value is `true`.
743+
///
744+
/// Setting this flag will override the same flag within a `%grmtools` section.
728745
pub fn octal(mut self, flag: bool) -> Self {
729-
self.regex_options.octal = flag;
746+
self.force_lex_flags.octal = Some(flag);
730747
self
731748
}
732749

733750
/// Sets the `regex::RegexBuilder` option of the same name.
734751
/// Default value is specified by regex.
752+
///
753+
/// Setting this flag will override the same flag within a `%grmtools` section.
735754
pub fn swap_greed(mut self, flag: bool) -> Self {
736-
self.regex_options.swap_greed = Some(flag);
755+
self.force_lex_flags.swap_greed = Some(flag);
737756
self
738757
}
739758

740759
/// Sets the `regex::RegexBuilder` option of the same name.
741760
/// Default value is specified by regex.
761+
///
762+
/// Setting this flag will override the same flag within a `%grmtools` section.
742763
pub fn ignore_whitespace(mut self, flag: bool) -> Self {
743-
self.regex_options.ignore_whitespace = Some(flag);
764+
self.force_lex_flags.ignore_whitespace = Some(flag);
744765
self
745766
}
746767

747768
/// Sets the `regex::RegexBuilder` option of the same name.
748769
/// Default value is specified by regex.
770+
///
771+
/// Setting this flag will override the same flag within a `%grmtools` section.
749772
pub fn unicode(mut self, flag: bool) -> Self {
750-
self.regex_options.unicode = Some(flag);
773+
self.force_lex_flags.unicode = Some(flag);
751774
self
752775
}
753776

754777
/// Sets the `regex::RegexBuilder` option of the same name.
755778
/// Default value is specified by regex.
779+
///
780+
/// Setting this flag will override the same flag within a `%grmtools` section.
756781
pub fn case_insensitive(mut self, flag: bool) -> Self {
757-
self.regex_options.case_insensitive = Some(flag);
782+
self.force_lex_flags.case_insensitive = Some(flag);
758783
self
759784
}
760785

761786
/// Sets the `regex::RegexBuilder` option of the same name.
762787
/// Default value is specified by regex.
788+
///
789+
/// Setting this flag will override the same flag within a `%grmtools` section.
763790
pub fn size_limit(mut self, sz: usize) -> Self {
764-
self.regex_options.size_limit = Some(sz);
791+
self.force_lex_flags.size_limit = Some(sz);
765792
self
766793
}
767794

768795
/// Sets the `regex::RegexBuilder` option of the same name.
769796
/// Default value is specified by regex.
797+
///
798+
/// Setting this flag will override the same flag within a `%grmtools` section.
770799
pub fn dfa_size_limit(mut self, sz: usize) -> Self {
771-
self.regex_options.dfa_size_limit = Some(sz);
800+
self.force_lex_flags.dfa_size_limit = Some(sz);
772801
self
773802
}
774803

775804
/// Sets the `regex::RegexBuilder` option of the same name.
776805
/// Default value is specified by regex.
806+
///
807+
/// Setting this flag will override the same flag within a `%grmtools` section.
777808
pub fn nest_limit(mut self, lim: u32) -> Self {
778-
self.regex_options.nest_limit = Some(lim);
809+
self.force_lex_flags.nest_limit = Some(lim);
810+
self
811+
}
812+
813+
/// `Some` values in the specified `flags` will be used as a default value
814+
/// unless the specified value has already been specified previously via `CTLexerBuilder`
815+
/// or was specified in the `%grmtools` section of a *.l* file.
816+
pub fn default_lex_flags(mut self, flags: LexFlags) -> Self {
817+
self.default_lex_flags = flags;
779818
self
780819
}
781820
}

0 commit comments

Comments
 (0)