22
33use std:: {
44 any:: type_name,
5- borrow:: Cow ,
65 collections:: { HashMap , HashSet } ,
76 env:: { current_dir, var} ,
87 error:: Error ,
@@ -20,7 +19,7 @@ use lazy_static::lazy_static;
2019use lrpar:: { CTParserBuilder , LexerTypes } ;
2120use num_traits:: { AsPrimitive , PrimInt , Unsigned } ;
2221use proc_macro2:: TokenStream ;
23- use quote:: { quote, ToTokens , TokenStreamExt } ;
22+ use quote:: { format_ident , quote, ToTokens , TokenStreamExt } ;
2423use regex:: Regex ;
2524use serde:: Serialize ;
2625
@@ -54,16 +53,19 @@ pub enum Visibility {
5453 PublicIn ( String ) ,
5554}
5655
57- impl Visibility {
58- fn cow_str ( & self ) -> Cow < ' static , str > {
59- match self {
60- Visibility :: Private => Cow :: from ( "" ) ,
61- Visibility :: Public => Cow :: from ( "pub" ) ,
62- Visibility :: PublicSuper => Cow :: from ( "pub(super)" ) ,
63- Visibility :: PublicSelf => Cow :: from ( "pub(self)" ) ,
64- Visibility :: PublicCrate => Cow :: from ( "pub(crate)" ) ,
65- Visibility :: PublicIn ( data) => Cow :: from ( format ! ( "pub(in {})" , data) ) ,
66- }
56+ impl ToTokens for Visibility {
57+ fn to_tokens ( & self , tokens : & mut TokenStream ) {
58+ tokens. extend ( match self {
59+ Visibility :: Private => quote ! ( ) ,
60+ Visibility :: Public => quote ! { pub } ,
61+ Visibility :: PublicSuper => quote ! { pub ( super ) } ,
62+ Visibility :: PublicSelf => quote ! { pub ( self ) } ,
63+ Visibility :: PublicCrate => quote ! { pub ( crate ) } ,
64+ Visibility :: PublicIn ( data) => {
65+ let other = str:: parse :: < TokenStream > ( data) . unwrap ( ) ;
66+ quote ! { pub ( in #other) }
67+ }
68+ } )
6769 }
6870}
6971
@@ -452,163 +454,125 @@ where
452454 format ! ( "{}_l" , stem)
453455 }
454456 } ;
455-
456- let mut outs = String :: new ( ) ;
457- //
458- // Header
459-
460- let ( lexerdef_name, lexerdef_type) = match self . lexerkind {
461- LexerKind :: LRNonStreamingLexer => (
462- "LRNonStreamingLexerDef" ,
463- format ! (
464- "LRNonStreamingLexerDef<{lexertypest}>" ,
465- lexertypest = type_name:: <LexerTypesT >( )
466- ) ,
467- ) ,
457+ let mod_name = format_ident ! ( "{}" , mod_name) ;
458+ let mut lexerdef_func_impl = {
459+ let LexFlags {
460+ allow_wholeline_comments,
461+ dot_matches_new_line,
462+ multi_line,
463+ octal,
464+ posix_escapes,
465+ case_insensitive,
466+ unicode,
467+ swap_greed,
468+ ignore_whitespace,
469+ size_limit,
470+ dfa_size_limit,
471+ nest_limit,
472+ } = lex_flags;
473+ let allow_wholeline_comments = QuoteOption ( allow_wholeline_comments) ;
474+ let dot_matches_new_line = QuoteOption ( dot_matches_new_line) ;
475+ let multi_line = QuoteOption ( multi_line) ;
476+ let octal = QuoteOption ( octal) ;
477+ let posix_escapes = QuoteOption ( posix_escapes) ;
478+ let case_insensitive = QuoteOption ( case_insensitive) ;
479+ let unicode = QuoteOption ( unicode) ;
480+ let swap_greed = QuoteOption ( swap_greed) ;
481+ let ignore_whitespace = QuoteOption ( ignore_whitespace) ;
482+ let size_limit = QuoteOption ( size_limit) ;
483+ let dfa_size_limit = QuoteOption ( dfa_size_limit) ;
484+ let nest_limit = QuoteOption ( nest_limit) ;
485+
486+ // Code gen for the lexerdef() `lex_flags` variable.
487+ quote ! {
488+ let mut lex_flags = :: lrlex:: DEFAULT_LEX_FLAGS ;
489+ lex_flags. allow_wholeline_comments = #allow_wholeline_comments;
490+ lex_flags. dot_matches_new_line = #dot_matches_new_line;
491+ lex_flags. multi_line = #multi_line;
492+ lex_flags. octal = #octal;
493+ lex_flags. posix_escapes = #posix_escapes;
494+ lex_flags. case_insensitive = #case_insensitive;
495+ lex_flags. unicode = #unicode;
496+ lex_flags. swap_greed = #swap_greed;
497+ lex_flags. ignore_whitespace = #ignore_whitespace;
498+ lex_flags. size_limit = #size_limit;
499+ lex_flags. dfa_size_limit = #dfa_size_limit;
500+ lex_flags. nest_limit = #nest_limit;
501+ let lex_flags = lex_flags;
502+ }
468503 } ;
469-
470- write ! (
471- outs,
472- "{mod_vis} mod {mod_name} {{
473- use lrlex::{{LexerDef, LRNonStreamingLexerDef, Rule, StartState}};
474-
475- #[allow(dead_code)]
476- pub fn lexerdef() -> {lexerdef_type} {{
477- " ,
478- mod_vis = self . visibility. cow_str( ) ,
479- mod_name = mod_name,
480- lexerdef_type = lexerdef_type
481- )
482- . ok ( ) ;
483-
484- let LexFlags {
485- allow_wholeline_comments,
486- dot_matches_new_line,
487- multi_line,
488- octal,
489- posix_escapes,
490- case_insensitive,
491- unicode,
492- swap_greed,
493- ignore_whitespace,
494- size_limit,
495- dfa_size_limit,
496- nest_limit,
497- } = lex_flags;
498- let allow_wholeline_comments = QuoteOption ( allow_wholeline_comments) ;
499- let dot_matches_new_line = QuoteOption ( dot_matches_new_line) ;
500- let multi_line = QuoteOption ( multi_line) ;
501- let octal = QuoteOption ( octal) ;
502- let posix_escapes = QuoteOption ( posix_escapes) ;
503- let case_insensitive = QuoteOption ( case_insensitive) ;
504- let unicode = QuoteOption ( unicode) ;
505- let swap_greed = QuoteOption ( swap_greed) ;
506- let ignore_whitespace = QuoteOption ( ignore_whitespace) ;
507- let size_limit = QuoteOption ( size_limit) ;
508- let dfa_size_limit = QuoteOption ( dfa_size_limit) ;
509- let nest_limit = QuoteOption ( nest_limit) ;
510-
511- outs. push_str ( & format ! (
512- "let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
513- lex_flags.allow_wholeline_comments = {allow_wholeline_comments};
514- lex_flags.dot_matches_new_line = {dot_matches_new_line};
515- lex_flags.multi_line = {multi_line};
516- lex_flags.octal = {octal};
517- lex_flags.posix_escapes = {posix_escapes};
518- lex_flags.case_insensitive = {case_insensitive};
519- lex_flags.unicode = {unicode};
520- lex_flags.swap_greed = {swap_greed};
521- lex_flags.ignore_whitespace = {ignore_whitespace};
522- lex_flags.size_limit = {size_limit};
523- lex_flags.dfa_size_limit = {dfa_size_limit};
524- lex_flags.nest_limit = {nest_limit};
525- let lex_flags = lex_flags;
526- " ,
527- allow_wholeline_comments = quote!( #allow_wholeline_comments) ,
528- dot_matches_new_line = quote!( #dot_matches_new_line) ,
529- multi_line = quote!( #multi_line) ,
530- octal = quote!( #octal) ,
531- posix_escapes = quote!( #posix_escapes) ,
532- case_insensitive = quote!( #case_insensitive) ,
533- unicode = quote!( #unicode) ,
534- swap_greed = quote!( #swap_greed) ,
535- ignore_whitespace = quote!( #ignore_whitespace) ,
536- size_limit = quote!( #size_limit) ,
537- dfa_size_limit = quote!( #dfa_size_limit) ,
538- nest_limit = quote!( #nest_limit) ,
539- ) ) ;
540-
541- outs. push_str ( " let start_states: Vec<StartState> = vec![" ) ;
542- for ss in lexerdef. iter_start_states ( ) {
543- let state_name = & ss. name ;
544- write ! (
545- outs,
546- "
547- StartState::new({}, {}, {}, ::cfgrammar::Span::new({}, {}))," ,
548- ss. id,
549- quote!( #state_name) ,
550- ss. exclusive,
551- ss. name_span. start( ) ,
552- ss. name_span. end( )
553- )
554- . ok ( ) ;
555- }
556- outs. push_str ( "\n ];\n " ) ;
557- outs. push_str ( " let rules = vec![" ) ;
558-
559- // Individual rules
560- for r in lexerdef. iter_rules ( ) {
561- let tok_id = QuoteOption ( r. tok_id ) ;
562- let n = QuoteOption ( r. name ( ) . map ( QuoteToString ) ) ;
563- let target_state = QuoteOption ( r. target_state ( ) . map ( |( x, y) | QuoteTuple ( ( x, y) ) ) ) ;
564- let n_span = r. name_span ( ) ;
565- let regex = QuoteToString ( & r. re_str ) ;
566- let start_states = r. start_states ( ) ;
567- write ! (
568- outs,
569- "
570- Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, &lex_flags).unwrap()," ,
571- quote!( #tok_id) ,
572- quote!( #n) ,
573- quote!( #n_span) ,
574- quote!( #regex) ,
575- quote!( [ #( #start_states) , * ] ) ,
576- quote!( #target_state) ,
577- )
578- . ok ( ) ;
504+ {
505+ let start_states = lexerdef. iter_start_states ( ) ;
506+ let rules = lexerdef. iter_rules ( ) . map ( |r| {
507+ let tok_id = QuoteOption ( r. tok_id ) ;
508+ let n = QuoteOption ( r. name ( ) . map ( QuoteToString ) ) ;
509+ let target_state =
510+ QuoteOption ( r. target_state ( ) . map ( |( x, y) | QuoteTuple ( ( x, y) ) ) ) ;
511+ let n_span = r. name_span ( ) ;
512+ let regex = QuoteToString ( & r. re_str ) ;
513+ let start_states = r. start_states ( ) ;
514+ // Code gen to construct a rule.
515+ //
516+ // We cannot `impl ToToken for Rule` because `Rule` never stores `lex_flags`,
517+ // Thus we reference the local lex_flags variable bound earlier.
518+ quote ! {
519+ Rule :: new( :: lrlex:: unstable_api:: InternalPublicApi , #tok_id, #n, #n_span, #regex. to_string( ) ,
520+ vec![ #( #start_states) , * ] , #target_state, & lex_flags) . unwrap( )
521+ }
522+ } ) ;
523+ // Code gen for `lexerdef()`s rules and the stack of `start_states`.
524+ lexerdef_func_impl. append_all ( quote ! {
525+ let start_states: Vec <StartState > = vec![ #( #start_states) , * ] ;
526+ let rules = vec![ #( #rules) , * ] ;
527+ } ) ;
579528 }
529+ let lexerdef_ty = match self . lexerkind {
530+ LexerKind :: LRNonStreamingLexer => {
531+ quote ! ( :: lrlex:: LRNonStreamingLexerDef )
532+ }
533+ } ;
534+ // Code gen for the lexerdef() return value referencing variables bound earlier.
535+ lexerdef_func_impl. append_all ( quote ! {
536+ #lexerdef_ty:: from_rules( start_states, rules)
537+ } ) ;
580538
581- // Footer
582- write ! (
583- outs,
584- "
585- ];
586- {lexerdef_name}::from_rules(start_states, rules)
587- }}
588-
589- " ,
590- lexerdef_name = lexerdef_name
591- )
592- . ok ( ) ;
593-
594- // Token IDs
595- if let Some ( ref rim) = self . rule_ids_map {
596- for ( n, id) in rim {
597- if RE_TOKEN_ID . is_match ( n) {
598- write ! (
599- outs,
600- "#[allow(dead_code)]\n pub const T_{}: {} = {};\n " ,
601- n. to_ascii_uppercase( ) ,
602- type_name:: <LexerTypesT :: StorageT >( ) ,
603- quote!( #id)
604- )
605- . ok ( ) ;
539+ let mut token_consts = TokenStream :: new ( ) ;
540+ if let Some ( rim) = self . rule_ids_map {
541+ for ( name, id) in rim {
542+ if RE_TOKEN_ID . is_match ( & name) {
543+ let tok_ident = format_ident ! ( "N_{}" , name. to_ascii_uppercase( ) ) ;
544+ let storaget =
545+ str:: parse :: < TokenStream > ( type_name :: < LexerTypesT :: StorageT > ( ) ) . unwrap ( ) ;
546+ // Code gen for the constant token values.
547+ let tok_const = quote ! {
548+ #[ allow( dead_code) ]
549+ pub const #tok_ident: #storaget = #id;
550+ } ;
551+ token_consts. extend ( tok_const)
606552 }
607553 }
608554 }
555+ let token_consts = token_consts. into_iter ( ) ;
556+ let out_tokens = {
557+ let lexerdef_param = str:: parse :: < TokenStream > ( type_name :: < LexerTypesT > ( ) ) . unwrap ( ) ;
558+ let mod_vis = self . visibility ;
559+ // Code gen for the generated module.
560+ quote ! {
561+ #mod_vis mod #mod_name {
562+ use :: lrlex:: { LexerDef , Rule , StartState } ;
563+ #[ allow( dead_code) ]
564+ pub fn lexerdef( ) -> #lexerdef_ty<#lexerdef_param> {
565+ #lexerdef_func_impl
566+ }
567+
568+ #( #token_consts) *
569+ }
570+ }
571+ } ;
609572
610- // Footer
611- outs. push ( '}' ) ;
573+ // Pretty print it.
574+ let out_file = syn:: parse_file ( & out_tokens. to_string ( ) ) . unwrap ( ) ;
575+ let outs = prettyplease:: unparse ( & out_file) ;
612576
613577 // If the file we're about to write out already exists with the same contents, then we
614578 // don't overwrite it (since that will force a recompile of the file, and relinking of the
0 commit comments