Add version of ct_token_map that doesn't insert #[allow(unused)]

taminomara · taminomara · commit 6885eb78fed9 · 2025-06-05T11:55:32.000+04:00
This helps spotting errors in custom lexers where some of the tokens are never emitted.
diff --git a/lrlex/examples/calc_manual_lex/build.rs b/lrlex/examples/calc_manual_lex/build.rs
@@ -1,4 +1,4 @@
-use lrlex::{DefaultLexerTypes, ct_token_map};
+use lrlex::{DefaultLexerTypes, ct_token_map_require_usage};
 use lrpar::CTParserBuilder;
 
 // Some of the token names in the parser do not lead to valid Rust identifiers, so we map them to
@@ -16,10 +16,11 @@ fn main() {
         .unwrap()
         .build()
         .unwrap();
-    ct_token_map::<u8>(
+    ct_token_map_require_usage::<u8>(
         "token_map",
         ctp.token_map(),
         Some(&TOKENS_MAP.iter().cloned().collect()),
+        true,
     )
     .unwrap();
 }
diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs
@@ -1192,10 +1192,11 @@ impl CTLexer {
 /// `HashMap{"ID": 0, "INT": 1}` the generated module will look roughly as follows:
 ///
 /// ```rust,ignore
+/// #[allow(dead_code)]
 /// mod x {
 ///   pub const T_ID: u8 = 0;
 ///   pub const T_INT: u8 = 1;
-///   pub const TOK_IDS: &[u8] = &[T_ID, T_INT];
+///   pub const TOK_IDS: &[u8] = &[0, 1];
 /// }
 /// ```
 ///
@@ -1205,16 +1206,34 @@ impl CTLexer {
 /// module will look roughly as follows:
 ///
 /// ```rust,ignore
+/// #[allow(dead_code)]
 /// mod x {
 ///   pub const T_PLUS: u8 = 0;
 ///   pub const T_ID: u8 = 1;
-///   pub const TOK_IDS: &[u8] = &[T_PLUS, T_ID];
+///   pub const TOK_IDS: &[u8] = &[0, 1];
 /// }
 /// ```
+///
+/// You can also use [`ct_token_map_require_usage`] to disable the `#[allow(dead_code)]` annotation.
+/// This will help you to ensure that a custom lexer can produce every token used in a parser.
 pub fn ct_token_map<StorageT: Display + ToTokens>(
     mod_name: &str,
     token_map: impl Borrow<HashMap<String, StorageT>>,
     rename_map: Option<&HashMap<&str, &str>>,
+) -> Result<(), Box<dyn Error>> {
+    ct_token_map_require_usage(mod_name, token_map, rename_map, false)
+}
+
+/// Same as [`ct_token_map`], but allows disabling `#[allow(dead_code)]` annotation
+/// for generated constants.
+///
+/// This function can help you to ensure that a custom lexer can produce
+/// every token used in a parser.
+pub fn ct_token_map_require_usage<StorageT: Display + ToTokens>(
+    mod_name: &str,
+    token_map: impl Borrow<HashMap<String, StorageT>>,
+    rename_map: Option<&HashMap<&str, &str>>,
+    require_usage: bool,
 ) -> Result<(), Box<dyn Error>> {
     // Record the time that this version of lrlex was built. If the source code changes and rustc
     // forces a recompile, this will change this value, causing anything which depends on this
@@ -1237,21 +1256,30 @@ pub fn ct_token_map<StorageT: Display + ToTokens>(
             };
             let tok_ident = format_ident!("T_{}", name.to_ascii_uppercase());
             (
+                // Note: the array of all tokens can't use `tok_ident` because
+                // it will confuse the dead code checker. For this reason,
+                // we use `id` here.
                 quote! {
-                    #tok_ident,
+                    #id,
                 },
                 quote! {
                     pub const #tok_ident: #storaget = #id;
                 },
             )
         })
         .unzip();
+    let unused_annotation = if require_usage {
+        quote! {}
+    } else {
+        quote! {#[allow(dead_code)]}
+    };
     // Since the formatter doesn't preserve comments and we don't want to lose build time,
     // just format the module contents.
     let unformatted = quote! {
+        #unused_annotation
         mod #mod_ident {
-            #![allow(dead_code)]
             #tokens
+            #[allow(dead_code)]
             pub const TOK_IDS: &[#storaget] = &[#token_array];
         }
     }
diff --git a/lrlex/src/lib/mod.rs b/lrlex/src/lib/mod.rs
@@ -21,7 +21,7 @@ mod lexer;
 mod parser;
 
 pub use crate::{
-    ctbuilder::{CTLexer, CTLexerBuilder, LexerKind, RustEdition, Visibility, ct_token_map},
+    ctbuilder::{CTLexer, CTLexerBuilder, LexerKind, RustEdition, Visibility, ct_token_map, ct_token_map_require_usage},
     defaults::{DefaultLexeme, DefaultLexerTypes},
     lexer::{
         DEFAULT_LEX_FLAGS, LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef, Rule,