diff --git a/Cargo.toml b/Cargo.toml index 569e2a815..a5d96140b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,3 +44,4 @@ sparsevec = "0.2" static_assertions = "1.1" unicode-width = "0.1.11" vob = ">=3.0.2" +proc-macro2 = "1.0" diff --git a/cfgrammar/Cargo.toml b/cfgrammar/Cargo.toml index 114b3ab7d..7e5084c7a 100644 --- a/cfgrammar/Cargo.toml +++ b/cfgrammar/Cargo.toml @@ -21,3 +21,5 @@ num-traits.workspace = true regex.workspace = true serde = { workspace = true, features = ["derive"], optional = true } vob = { workspace = true, features = ["serde"] } +quote.workspace = true +proc-macro2.workspace = true diff --git a/cfgrammar/src/lib/span.rs b/cfgrammar/src/lib/span.rs index 8b5613d58..0f63fc10d 100644 --- a/cfgrammar/src/lib/span.rs +++ b/cfgrammar/src/lib/span.rs @@ -1,3 +1,5 @@ +use proc_macro2::TokenStream; +use quote::{quote, ToTokens, TokenStreamExt}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -54,3 +56,10 @@ pub trait Spanned: std::fmt::Display { /// Returns the `SpansKind` associated with this error. fn spanskind(&self) -> crate::yacc::parser::SpansKind; } + +impl ToTokens for Span { + fn to_tokens(&self, tokens: &mut TokenStream) { + let Span { start, end } = self; + tokens.append_all(quote! {::cfgrammar::Span::new(#start, #end)}); + } +} diff --git a/cfgrammar/src/lib/yacc/mod.rs b/cfgrammar/src/lib/yacc/mod.rs index 7034aaaad..2127aed2d 100644 --- a/cfgrammar/src/lib/yacc/mod.rs +++ b/cfgrammar/src/lib/yacc/mod.rs @@ -10,6 +10,8 @@ pub use self::{ grammar::{AssocKind, Precedence, SentenceGenerator, YaccGrammar}, parser::{YaccGrammarError, YaccGrammarErrorKind, YaccGrammarWarning, YaccGrammarWarningKind}, }; +use proc_macro2::TokenStream; +use quote::quote; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -39,6 +41,18 @@ pub enum YaccKind { Eco, } +impl quote::ToTokens for YaccKind { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.extend(match *self { + YaccKind::Grmtools => quote!(::cfgrammar::yacc::YaccKind::Grmtools), + YaccKind::Original(action_kind) => { + quote!(::cfgrammar::yacc::YaccKind::Original(#action_kind)) + } + YaccKind::Eco => quote!(::cfgrammar::yacc::YaccKind::Eco), + }) + } +} + #[derive(Clone, Copy, Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum YaccOriginalActionKind { @@ -50,3 +64,19 @@ pub enum YaccOriginalActionKind { /// Do not do execute actions of any sort. NoAction, } + +impl quote::ToTokens for YaccOriginalActionKind { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.extend(match *self { + YaccOriginalActionKind::UserAction => { + quote!(::cfgrammar::yacc::YaccOriginalActionKind::UserAction) + } + YaccOriginalActionKind::GenericParseTree => { + quote!(::cfgrammar::yacc::YaccOriginalActionKind::GenericParseTree) + } + YaccOriginalActionKind::NoAction => { + quote!(::cfgrammar::yacc::YaccOriginalActionKind::NoAction) + } + }) + } +} diff --git a/lrlex/Cargo.toml b/lrlex/Cargo.toml index d74f33f19..06c1e91d6 100644 --- a/lrlex/Cargo.toml +++ b/lrlex/Cargo.toml @@ -33,5 +33,6 @@ lazy_static.workspace = true regex.workspace = true regex-syntax.workspace = true num-traits.workspace = true +proc-macro2.workspace = true quote.workspace = true serde.workspace = true diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 90ac46b8c..84d684bb1 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -19,7 +19,8 @@ use cfgrammar::{newlinecache::NewlineCache, Spanned}; use lazy_static::lazy_static; use lrpar::{CTParserBuilder, LexerTypes}; use num_traits::{AsPrimitive, PrimInt, Unsigned}; -use quote::quote; +use proc_macro2::TokenStream; +use quote::{quote, ToTokens, TokenStreamExt}; use regex::Regex; use serde::Serialize; @@ -78,11 +79,48 @@ pub enum RustEdition { Rust2021, } +/// The quote impl of `ToTokens` for `Option` prints an empty string for `None` +/// and the inner value for `Some(inner_value)`. +/// +/// This wrapper instead emits both `Some` and `None` variants. +/// See: [quote #20](https://github.com/dtolnay/quote/issues/20) +struct QuoteOption(Option); + +impl ToTokens for QuoteOption { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(match self.0 { + Some(ref t) => quote! { ::std::option::Option::Some(#t) }, + None => quote! { ::std::option::Option::None }, + }); + } +} + +/// This wrapper adds a missing impl of `ToTokens` for tuples. +/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())` +struct QuoteTuple(T); + +impl ToTokens for QuoteTuple<(A, B)> { + fn to_tokens(&self, tokens: &mut TokenStream) { + let (a, b) = &self.0; + tokens.append_all(quote!((#a, #b))); + } +} + +/// The wrapped `&str` value will be emitted with a call to `to_string()` +struct QuoteToString<'a>(&'a str); + +impl ToTokens for QuoteToString<'_> { + fn to_tokens(&self, tokens: &mut TokenStream) { + let x = &self.0; + tokens.append_all(quote! { #x.to_string() }); + } +} + /// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated /// lexer. pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes> where - LexerTypesT::StorageT: Debug + Eq + Hash, + LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens, usize: num_traits::AsPrimitive, { lrpar_config: Option) -> CTParserBuilder>>, @@ -108,7 +146,7 @@ impl CTLexerBuilder<'_, DefaultLexerTypes> { impl<'a, LexerTypesT: LexerTypes> CTLexerBuilder<'a, LexerTypesT> where LexerTypesT::StorageT: - 'static + Debug + Eq + Hash + PrimInt + Serialize + TryFrom + Unsigned, + 'static + Debug + Eq + Hash + PrimInt + Serialize + TryFrom + Unsigned + ToTokens, usize: AsPrimitive, { /// Create a new [CTLexerBuilder]. @@ -438,31 +476,52 @@ pub fn lexerdef() -> {lexerdef_type} {{ ) .ok(); + let RegexOptions { + dot_matches_new_line, + multi_line, + octal, + posix_escapes, + case_insensitive, + unicode, + swap_greed, + ignore_whitespace, + size_limit, + dfa_size_limit, + nest_limit, + } = self.regex_options; + let case_insensitive = QuoteOption(case_insensitive); + let unicode = QuoteOption(unicode); + let swap_greed = QuoteOption(swap_greed); + let ignore_whitespace = QuoteOption(ignore_whitespace); + let size_limit = QuoteOption(size_limit); + let dfa_size_limit = QuoteOption(dfa_size_limit); + let nest_limit = QuoteOption(nest_limit); + outs.push_str(&format!( "let regex_options = ::lrlex::RegexOptions {{ - dot_matches_new_line: {dot_matches_new_line:?}, - multi_line: {multi_line:?}, - octal: {octal:?}, - posix_escapes: {posix_escapes:?}, - case_insensitive: {case_insensitive:?}, - unicode: {unicode:?}, - swap_greed: {swap_greed:?}, - ignore_whitespace: {ignore_whitespace:?}, - size_limit: {size_limit:?}, - dfa_size_limit: {dfa_size_limit:?}, - nest_limit: {nest_limit:?}, + dot_matches_new_line: {dot_matches_new_line}, + multi_line: {multi_line}, + octal: {octal}, + posix_escapes: {posix_escapes}, + case_insensitive: {case_insensitive}, + unicode: {unicode}, + swap_greed: {swap_greed}, + ignore_whitespace: {ignore_whitespace}, + size_limit: {size_limit}, + dfa_size_limit: {dfa_size_limit}, + nest_limit: {nest_limit}, }};", - dot_matches_new_line = self.regex_options.dot_matches_new_line, - multi_line = self.regex_options.multi_line, - octal = self.regex_options.octal, - posix_escapes = self.regex_options.posix_escapes, - case_insensitive = self.regex_options.case_insensitive, - unicode = self.regex_options.unicode, - swap_greed = self.regex_options.swap_greed, - ignore_whitespace = self.regex_options.ignore_whitespace, - size_limit = self.regex_options.size_limit, - dfa_size_limit = self.regex_options.dfa_size_limit, - nest_limit = self.regex_options.nest_limit, + dot_matches_new_line = quote!(#dot_matches_new_line), + multi_line = quote!(#multi_line), + octal = quote!(#octal), + posix_escapes = quote!(#posix_escapes), + case_insensitive = quote!(#case_insensitive), + unicode = quote!(#unicode), + swap_greed = quote!(#swap_greed), + ignore_whitespace = quote!(#ignore_whitespace), + size_limit = quote!(#size_limit), + dfa_size_limit = quote!(#dfa_size_limit), + nest_limit = quote!(#nest_limit), )); outs.push_str(" let start_states: Vec = vec!["); @@ -485,35 +544,22 @@ pub fn lexerdef() -> {lexerdef_type} {{ // Individual rules for r in lexerdef.iter_rules() { - let tok_id = match r.tok_id { - Some(ref t) => format!("Some({:?})", t), - None => "None".to_owned(), - }; - let n = match r.name() { - Some(ref n) => format!("Some({}.to_string())", quote!(#n)), - None => "None".to_owned(), - }; - let target_state = match &r.target_state() { - Some((id, op)) => format!("Some(({}, ::lrlex::StartStateOperation::{:?}))", id, op), - None => "None".to_owned(), - }; - let n_span = format!( - "::cfgrammar::Span::new({}, {})", - r.name_span().start(), - r.name_span().end() - ); - let regex = &r.re_str; + let tok_id = QuoteOption(r.tok_id); + let n = QuoteOption(r.name().map(QuoteToString)); + let target_state = QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y)))); + let n_span = r.name_span(); + let regex = QuoteToString(&r.re_str); let start_states = r.start_states(); write!( outs, " Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, ®ex_options).unwrap(),", - tok_id, - n, - n_span, + quote!(#tok_id), + quote!(#n), + quote!(#n_span), quote!(#regex), quote!([#(#start_states),*]), - target_state, + quote!(#target_state), ) .ok(); } @@ -537,10 +583,10 @@ pub fn lexerdef() -> {lexerdef_type} {{ if RE_TOKEN_ID.is_match(n) { write!( outs, - "#[allow(dead_code)]\npub const T_{}: {} = {:?};\n", + "#[allow(dead_code)]\npub const T_{}: {} = {};\n", n.to_ascii_uppercase(), type_name::(), - *id + quote!(#id) ) .ok(); } diff --git a/lrlex/src/lib/parser.rs b/lrlex/src/lib/parser.rs index 71b15258f..2026878dc 100644 --- a/lrlex/src/lib/parser.rs +++ b/lrlex/src/lib/parser.rs @@ -76,6 +76,18 @@ pub enum StartStateOperation { Pop, } +use proc_macro2::TokenStream; +use quote::quote; +impl quote::ToTokens for StartStateOperation { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.extend(match *self { + StartStateOperation::ReplaceStack => quote!(::lrlex::StartStateOperation::ReplaceStack), + StartStateOperation::Push => quote!(::lrlex::StartStateOperation::Push), + StartStateOperation::Pop => quote!(::lrlex::StartStateOperation::Pop), + }) + } +} + pub(super) struct LexParser where usize: AsPrimitive, diff --git a/lrpar/Cargo.toml b/lrpar/Cargo.toml index 39b6a4113..b9b06ddaa 100644 --- a/lrpar/Cargo.toml +++ b/lrpar/Cargo.toml @@ -34,10 +34,12 @@ indexmap.workspace = true lazy_static.workspace = true num-traits.workspace = true packedvec.workspace = true +proc-macro2.workspace = true +quote.workspace = true +regex.workspace = true serde = { workspace = true, features = ["derive"] } static_assertions.workspace = true vob.workspace = true -regex.workspace = true [dev-dependencies] tempfile = "3.0" diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index d26f734ff..1a4eccbaf 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -27,6 +27,8 @@ use filetime::FileTime; use lazy_static::lazy_static; use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable}; use num_traits::{AsPrimitive, PrimInt, Unsigned}; +use proc_macro2::{Literal, TokenStream}; +use quote::{quote, ToTokens, TokenStreamExt}; use regex::Regex; use serde::{de::DeserializeOwned, Serialize}; @@ -52,6 +54,34 @@ struct CTConflictsError { stable: StateTable, } +/// The quote impl of `ToTokens` for `Option` prints an empty string for `None` +/// and the inner value for `Some(inner_value)`. +/// +/// This wrapper instead emits both `Some` and `None` variants. +/// See: [quote #20](https://github.com/dtolnay/quote/issues/20) +struct QuoteOption(Option); + +impl ToTokens for QuoteOption { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(match self.0 { + Some(ref t) => quote! { ::std::option::Option::Some(#t) }, + None => quote! { ::std::option::Option::None }, + }); + } +} + +/// The quote impl of `ToTokens` for `usize` prints literal values +/// including a type suffix for example `0usize`. +/// +/// This wrapper omits the type suffix emitting `0` instead. +struct UnsuffixedUsize(usize); + +impl ToTokens for UnsuffixedUsize { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Literal::usize_unsuffixed(self.0)) + } +} + impl fmt::Display for CTConflictsError where StorageT: 'static + Debug + Hash + PrimInt + Unsigned, @@ -733,17 +763,23 @@ where // Record the time that this version of lrpar was built. If the source code changes and // rustc forces a recompile, this will change this value, causing anything which depends on // this build of lrpar to be recompiled too. - writeln!(cache, " Build time: {:?}", env!("VERGEN_BUILD_TIMESTAMP")).ok(); - - writeln!(cache, " Grammar path: {:?}", self.grammar_path).ok(); - writeln!(cache, " Mod name: {:?}", self.mod_name).ok(); - writeln!(cache, " Recoverer: {:?}", self.recoverer).ok(); - writeln!(cache, " YaccKind: {:?}", self.yacckind).ok(); - writeln!(cache, " Visibility: {:?}", self.visibility.cow_str()).ok(); + let build_time = env!("VERGEN_BUILD_TIMESTAMP"); + let grammar_path = self.grammar_path.as_ref().unwrap().to_string_lossy(); + let mod_name = self.mod_name; + let recoverer = self.recoverer; + let yacckind = self.yacckind; + let visibility = self.visibility.cow_str(); + let error_on_conflicts = self.error_on_conflicts; + writeln!(cache, " Build time: {}", quote!(#build_time)).ok(); + writeln!(cache, " Grammar path: {}", quote!(#grammar_path)).ok(); + writeln!(cache, " Mod name: {}", quote!(#mod_name)).ok(); + writeln!(cache, " Recoverer: {}", quote!(#recoverer)).ok(); + writeln!(cache, " YaccKind: {}", quote!(#yacckind)).ok(); + writeln!(cache, " Visibility: {}", quote!(#visibility)).ok(); writeln!( cache, - " Error on conflicts: {:?}\n", - self.error_on_conflicts + " Error on conflicts: {}\n", + quote!(#error_on_conflicts) ) .ok(); @@ -840,11 +876,8 @@ where let wrappers = grm .iter_pidxs() .map(|pidx| { - format!( - "&{prefix}wrapper_{}", - usize::from(pidx), - prefix = ACTION_PREFIX - ) + let pidx = UnsuffixedUsize(usize::from(pidx)); + format!("&{prefix}wrapper_{}", quote!(#pidx), prefix = ACTION_PREFIX) }) .collect::>() .join(",\n "); @@ -867,6 +900,7 @@ where wrappers = wrappers, edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" }, ).ok(); + let ridx = UnsuffixedUsize(usize::from(self.user_start_ridx(grm))); write!( outs, " @@ -880,7 +914,7 @@ where parse_param = parse_param, actionskind = ACTIONS_KIND, actionskindprefix = ACTIONS_KIND_PREFIX, - ridx = usize::from(self.user_start_ridx(grm)), + ridx = quote!(#ridx), recoverer = recoverer, ) .ok(); @@ -920,7 +954,7 @@ where if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) { write!( outs, - " #[allow(dead_code)]\n pub const R_{}: {} = {:?};\n", + " #[allow(dead_code)]\n pub const R_{}: {} = {};\n", grm.rule_name_str(ridx).to_ascii_uppercase(), type_name::(), usize::from(ridx) @@ -934,10 +968,8 @@ where fn gen_token_epp(&self, grm: &YaccGrammar) -> String { let mut tidxs = Vec::new(); for tidx in grm.iter_tidxs() { - match grm.token_epp(tidx) { - Some(n) => tidxs.push(format!("Some(\"{}\")", str_escape(n))), - None => tidxs.push("None".to_string()), - } + let tok_epp = QuoteOption(grm.token_epp(tidx)); + tidxs.push(format!("{}", quote!(#tok_epp))); } format!( " const {prefix}EPP: &[::std::option::Option<&str>] = &[{}]; @@ -966,6 +998,7 @@ where }; for pidx in grm.iter_pidxs() { let ridx = grm.prod_to_rule(pidx); + let pidx_num = UnsuffixedUsize(usize::from(pidx)); // Iterate over all $-arguments and replace them with their respective // element from the argument vector (e.g. $1 is replaced by args[0]). At @@ -977,7 +1010,7 @@ where mut {prefix}args: ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>, {parse_paramdef}) -> {actionskind}<'input> {{", - usize::from(pidx), + quote!(#pidx_num), storaget = type_name::(), lexertypest = type_name::(), prefix = ACTION_PREFIX, @@ -1242,11 +1275,6 @@ where } } -/// Return a version of the string `s` which is safe to embed in source code as a string. -fn str_escape(s: &str) -> String { - s.replace('\\', "\\\\").replace('"', "\\\"") -} - /// This function is called by generated files; it exists so that generated files don't require a /// dependency on serde and rmps. #[doc(hidden)] diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 82e969950..736cf5649 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -12,6 +12,8 @@ use cactus::Cactus; use cfgrammar::{yacc::YaccGrammar, RIdx, Span, TIdx}; use lrtable::{Action, StIdx, StateTable}; use num_traits::{AsPrimitive, PrimInt, Unsigned}; +use proc_macro2::TokenStream; +use quote::quote; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -621,6 +623,15 @@ pub enum RecoveryKind { None, } +impl quote::ToTokens for RecoveryKind { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.extend(match *self { + RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus), + RecoveryKind::None => quote!(::lrpar::RecoveryKind::None), + }) + } +} + /// A lexing or parsing error. Although the two are quite distinct in terms of what can be reported /// to users, both can (at least conceptually) occur at any point of the intertwined lexing/parsing /// process.