Skip to content

Commit aba828e

Browse files
authored
Merge pull request #503 from ratmice/refactor_codgen_quote
Refactor lrlex codegen using quote
2 parents 7669305 + c226086 commit aba828e

File tree

4 files changed

+144
-164
lines changed

4 files changed

+144
-164
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,5 @@ static_assertions = "1.1"
4545
unicode-width = "0.1.11"
4646
vob = ">=3.0.2"
4747
proc-macro2 = "1.0"
48+
prettyplease = "0.2.30"
49+
syn = "2.0"

lrlex/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,5 @@ num-traits.workspace = true
3636
proc-macro2.workspace = true
3737
quote.workspace = true
3838
serde.workspace = true
39+
prettyplease.workspace = true
40+
syn.workspace = true

lrlex/src/lib/ctbuilder.rs

Lines changed: 126 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
33
use std::{
44
any::type_name,
5-
borrow::Cow,
65
collections::{HashMap, HashSet},
76
env::{current_dir, var},
87
error::Error,
@@ -20,7 +19,7 @@ use lazy_static::lazy_static;
2019
use lrpar::{CTParserBuilder, LexerTypes};
2120
use num_traits::{AsPrimitive, PrimInt, Unsigned};
2221
use proc_macro2::TokenStream;
23-
use quote::{quote, ToTokens, TokenStreamExt};
22+
use quote::{format_ident, quote, ToTokens, TokenStreamExt};
2423
use regex::Regex;
2524
use serde::Serialize;
2625

@@ -54,16 +53,19 @@ pub enum Visibility {
5453
PublicIn(String),
5554
}
5655

57-
impl Visibility {
58-
fn cow_str(&self) -> Cow<'static, str> {
59-
match self {
60-
Visibility::Private => Cow::from(""),
61-
Visibility::Public => Cow::from("pub"),
62-
Visibility::PublicSuper => Cow::from("pub(super)"),
63-
Visibility::PublicSelf => Cow::from("pub(self)"),
64-
Visibility::PublicCrate => Cow::from("pub(crate)"),
65-
Visibility::PublicIn(data) => Cow::from(format!("pub(in {})", data)),
66-
}
56+
impl ToTokens for Visibility {
57+
fn to_tokens(&self, tokens: &mut TokenStream) {
58+
tokens.extend(match self {
59+
Visibility::Private => quote!(),
60+
Visibility::Public => quote! {pub},
61+
Visibility::PublicSuper => quote! {pub(super)},
62+
Visibility::PublicSelf => quote! {pub(self)},
63+
Visibility::PublicCrate => quote! {pub(crate)},
64+
Visibility::PublicIn(data) => {
65+
let other = str::parse::<TokenStream>(data).unwrap();
66+
quote! {pub(in #other)}
67+
}
68+
})
6769
}
6870
}
6971

@@ -452,163 +454,125 @@ where
452454
format!("{}_l", stem)
453455
}
454456
};
455-
456-
let mut outs = String::new();
457-
//
458-
// Header
459-
460-
let (lexerdef_name, lexerdef_type) = match self.lexerkind {
461-
LexerKind::LRNonStreamingLexer => (
462-
"LRNonStreamingLexerDef",
463-
format!(
464-
"LRNonStreamingLexerDef<{lexertypest}>",
465-
lexertypest = type_name::<LexerTypesT>()
466-
),
467-
),
457+
let mod_name = format_ident!("{}", mod_name);
458+
let mut lexerdef_func_impl = {
459+
let LexFlags {
460+
allow_wholeline_comments,
461+
dot_matches_new_line,
462+
multi_line,
463+
octal,
464+
posix_escapes,
465+
case_insensitive,
466+
unicode,
467+
swap_greed,
468+
ignore_whitespace,
469+
size_limit,
470+
dfa_size_limit,
471+
nest_limit,
472+
} = lex_flags;
473+
let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
474+
let dot_matches_new_line = QuoteOption(dot_matches_new_line);
475+
let multi_line = QuoteOption(multi_line);
476+
let octal = QuoteOption(octal);
477+
let posix_escapes = QuoteOption(posix_escapes);
478+
let case_insensitive = QuoteOption(case_insensitive);
479+
let unicode = QuoteOption(unicode);
480+
let swap_greed = QuoteOption(swap_greed);
481+
let ignore_whitespace = QuoteOption(ignore_whitespace);
482+
let size_limit = QuoteOption(size_limit);
483+
let dfa_size_limit = QuoteOption(dfa_size_limit);
484+
let nest_limit = QuoteOption(nest_limit);
485+
486+
// Code gen for the lexerdef() `lex_flags` variable.
487+
quote! {
488+
let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
489+
lex_flags.allow_wholeline_comments = #allow_wholeline_comments;
490+
lex_flags.dot_matches_new_line = #dot_matches_new_line;
491+
lex_flags.multi_line = #multi_line;
492+
lex_flags.octal = #octal;
493+
lex_flags.posix_escapes = #posix_escapes;
494+
lex_flags.case_insensitive = #case_insensitive;
495+
lex_flags.unicode = #unicode;
496+
lex_flags.swap_greed = #swap_greed;
497+
lex_flags.ignore_whitespace = #ignore_whitespace;
498+
lex_flags.size_limit = #size_limit;
499+
lex_flags.dfa_size_limit = #dfa_size_limit;
500+
lex_flags.nest_limit = #nest_limit;
501+
let lex_flags = lex_flags;
502+
}
468503
};
469-
470-
write!(
471-
outs,
472-
"{mod_vis} mod {mod_name} {{
473-
use lrlex::{{LexerDef, LRNonStreamingLexerDef, Rule, StartState}};
474-
475-
#[allow(dead_code)]
476-
pub fn lexerdef() -> {lexerdef_type} {{
477-
",
478-
mod_vis = self.visibility.cow_str(),
479-
mod_name = mod_name,
480-
lexerdef_type = lexerdef_type
481-
)
482-
.ok();
483-
484-
let LexFlags {
485-
allow_wholeline_comments,
486-
dot_matches_new_line,
487-
multi_line,
488-
octal,
489-
posix_escapes,
490-
case_insensitive,
491-
unicode,
492-
swap_greed,
493-
ignore_whitespace,
494-
size_limit,
495-
dfa_size_limit,
496-
nest_limit,
497-
} = lex_flags;
498-
let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
499-
let dot_matches_new_line = QuoteOption(dot_matches_new_line);
500-
let multi_line = QuoteOption(multi_line);
501-
let octal = QuoteOption(octal);
502-
let posix_escapes = QuoteOption(posix_escapes);
503-
let case_insensitive = QuoteOption(case_insensitive);
504-
let unicode = QuoteOption(unicode);
505-
let swap_greed = QuoteOption(swap_greed);
506-
let ignore_whitespace = QuoteOption(ignore_whitespace);
507-
let size_limit = QuoteOption(size_limit);
508-
let dfa_size_limit = QuoteOption(dfa_size_limit);
509-
let nest_limit = QuoteOption(nest_limit);
510-
511-
outs.push_str(&format!(
512-
"let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
513-
lex_flags.allow_wholeline_comments = {allow_wholeline_comments};
514-
lex_flags.dot_matches_new_line = {dot_matches_new_line};
515-
lex_flags.multi_line = {multi_line};
516-
lex_flags.octal = {octal};
517-
lex_flags.posix_escapes = {posix_escapes};
518-
lex_flags.case_insensitive = {case_insensitive};
519-
lex_flags.unicode = {unicode};
520-
lex_flags.swap_greed = {swap_greed};
521-
lex_flags.ignore_whitespace = {ignore_whitespace};
522-
lex_flags.size_limit = {size_limit};
523-
lex_flags.dfa_size_limit = {dfa_size_limit};
524-
lex_flags.nest_limit = {nest_limit};
525-
let lex_flags = lex_flags;
526-
",
527-
allow_wholeline_comments = quote!(#allow_wholeline_comments),
528-
dot_matches_new_line = quote!(#dot_matches_new_line),
529-
multi_line = quote!(#multi_line),
530-
octal = quote!(#octal),
531-
posix_escapes = quote!(#posix_escapes),
532-
case_insensitive = quote!(#case_insensitive),
533-
unicode = quote!(#unicode),
534-
swap_greed = quote!(#swap_greed),
535-
ignore_whitespace = quote!(#ignore_whitespace),
536-
size_limit = quote!(#size_limit),
537-
dfa_size_limit = quote!(#dfa_size_limit),
538-
nest_limit = quote!(#nest_limit),
539-
));
540-
541-
outs.push_str(" let start_states: Vec<StartState> = vec![");
542-
for ss in lexerdef.iter_start_states() {
543-
let state_name = &ss.name;
544-
write!(
545-
outs,
546-
"
547-
StartState::new({}, {}, {}, ::cfgrammar::Span::new({}, {})),",
548-
ss.id,
549-
quote!(#state_name),
550-
ss.exclusive,
551-
ss.name_span.start(),
552-
ss.name_span.end()
553-
)
554-
.ok();
555-
}
556-
outs.push_str("\n ];\n");
557-
outs.push_str(" let rules = vec![");
558-
559-
// Individual rules
560-
for r in lexerdef.iter_rules() {
561-
let tok_id = QuoteOption(r.tok_id);
562-
let n = QuoteOption(r.name().map(QuoteToString));
563-
let target_state = QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
564-
let n_span = r.name_span();
565-
let regex = QuoteToString(&r.re_str);
566-
let start_states = r.start_states();
567-
write!(
568-
outs,
569-
"
570-
Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, &lex_flags).unwrap(),",
571-
quote!(#tok_id),
572-
quote!(#n),
573-
quote!(#n_span),
574-
quote!(#regex),
575-
quote!([#(#start_states),*]),
576-
quote!(#target_state),
577-
)
578-
.ok();
504+
{
505+
let start_states = lexerdef.iter_start_states();
506+
let rules = lexerdef.iter_rules().map(|r| {
507+
let tok_id = QuoteOption(r.tok_id);
508+
let n = QuoteOption(r.name().map(QuoteToString));
509+
let target_state =
510+
QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
511+
let n_span = r.name_span();
512+
let regex = QuoteToString(&r.re_str);
513+
let start_states = r.start_states();
514+
// Code gen to construct a rule.
515+
//
516+
// We cannot `impl ToToken for Rule` because `Rule` never stores `lex_flags`,
517+
// Thus we reference the local lex_flags variable bound earlier.
518+
quote! {
519+
Rule::new(::lrlex::unstable_api::InternalPublicApi, #tok_id, #n, #n_span, #regex.to_string(),
520+
vec![#(#start_states),*], #target_state, &lex_flags).unwrap()
521+
}
522+
});
523+
// Code gen for `lexerdef()`s rules and the stack of `start_states`.
524+
lexerdef_func_impl.append_all(quote! {
525+
let start_states: Vec<StartState> = vec![#(#start_states),*];
526+
let rules = vec![#(#rules),*];
527+
});
579528
}
529+
let lexerdef_ty = match self.lexerkind {
530+
LexerKind::LRNonStreamingLexer => {
531+
quote!(::lrlex::LRNonStreamingLexerDef)
532+
}
533+
};
534+
// Code gen for the lexerdef() return value referencing variables bound earlier.
535+
lexerdef_func_impl.append_all(quote! {
536+
#lexerdef_ty::from_rules(start_states, rules)
537+
});
580538

581-
// Footer
582-
write!(
583-
outs,
584-
"
585-
];
586-
{lexerdef_name}::from_rules(start_states, rules)
587-
}}
588-
589-
",
590-
lexerdef_name = lexerdef_name
591-
)
592-
.ok();
593-
594-
// Token IDs
595-
if let Some(ref rim) = self.rule_ids_map {
596-
for (n, id) in rim {
597-
if RE_TOKEN_ID.is_match(n) {
598-
write!(
599-
outs,
600-
"#[allow(dead_code)]\npub const T_{}: {} = {};\n",
601-
n.to_ascii_uppercase(),
602-
type_name::<LexerTypesT::StorageT>(),
603-
quote!(#id)
604-
)
605-
.ok();
539+
let mut token_consts = TokenStream::new();
540+
if let Some(rim) = self.rule_ids_map {
541+
for (name, id) in rim {
542+
if RE_TOKEN_ID.is_match(&name) {
543+
let tok_ident = format_ident!("N_{}", name.to_ascii_uppercase());
544+
let storaget =
545+
str::parse::<TokenStream>(type_name::<LexerTypesT::StorageT>()).unwrap();
546+
// Code gen for the constant token values.
547+
let tok_const = quote! {
548+
#[allow(dead_code)]
549+
pub const #tok_ident: #storaget = #id;
550+
};
551+
token_consts.extend(tok_const)
606552
}
607553
}
608554
}
555+
let token_consts = token_consts.into_iter();
556+
let out_tokens = {
557+
let lexerdef_param = str::parse::<TokenStream>(type_name::<LexerTypesT>()).unwrap();
558+
let mod_vis = self.visibility;
559+
// Code gen for the generated module.
560+
quote! {
561+
#mod_vis mod #mod_name {
562+
use ::lrlex::{LexerDef, Rule, StartState};
563+
#[allow(dead_code)]
564+
pub fn lexerdef() -> #lexerdef_ty<#lexerdef_param> {
565+
#lexerdef_func_impl
566+
}
567+
568+
#(#token_consts)*
569+
}
570+
}
571+
};
609572

610-
// Footer
611-
outs.push('}');
573+
// Pretty print it.
574+
let out_file = syn::parse_file(&out_tokens.to_string()).unwrap();
575+
let outs = prettyplease::unparse(&out_file);
612576

613577
// If the file we're about to write out already exists with the same contents, then we
614578
// don't overwrite it (since that will force a recompile of the file, and relinking of the

lrlex/src/lib/parser.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use cfgrammar::Span;
22
use lazy_static::lazy_static;
33
use lrpar::LexerTypes;
44
use num_traits::AsPrimitive;
5+
use proc_macro2::TokenStream;
6+
use quote::quote;
57
use regex::Regex;
68
use std::borrow::{Borrow as _, Cow};
79
use std::collections::HashMap;
@@ -71,6 +73,18 @@ impl StartState {
7173
}
7274
}
7375

76+
impl quote::ToTokens for StartState {
77+
fn to_tokens(&self, tokens: &mut TokenStream) {
78+
let StartState {
79+
id,
80+
name,
81+
name_span,
82+
exclusive,
83+
} = &self;
84+
tokens.extend(quote! {::lrlex::StartState::new(#id, #name, #exclusive, #name_span)})
85+
}
86+
}
87+
7488
#[derive(Clone, Debug, Eq, PartialEq)]
7589
#[doc(hidden)]
7690
pub enum StartStateOperation {
@@ -79,8 +93,6 @@ pub enum StartStateOperation {
7993
Pop,
8094
}
8195

82-
use proc_macro2::TokenStream;
83-
use quote::quote;
8496
impl quote::ToTokens for StartStateOperation {
8597
fn to_tokens(&self, tokens: &mut TokenStream) {
8698
tokens.extend(match *self {

0 commit comments

Comments
 (0)