zurawiki · kyle-rader · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/tiktoken-rs/Cargo.toml b/tiktoken-rs/Cargo.toml
@@ -13,6 +13,10 @@ documentation = "https://docs.rs/crate/tiktoken-rs/"
 license = "MIT"
 readme = "../README.md"
 
+[[bin]]
+name = "tiktoken"
+path = "src/main.rs"
+
 [profile.release]
 debug = 1
 
@@ -21,11 +25,14 @@ anyhow = "1.0.76"
 async-openai = { version = "0.14.2", optional = true }
 base64 = "0.22.0"
 bstr = "1.6.2"
+clap = { version = "4.4", features = ["derive"] }
 dhat = { version = "0.3.2", optional = true }
 fancy-regex = "0.13.0"
 lazy_static = "1.4.0"
 regex = "1.10.3"
 rustc-hash = "1.1.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
 
 [features]
 async-openai = ["dep:async-openai"]

diff --git a/tiktoken-rs/README.md b/tiktoken-rs/README.md
@@ -18,6 +18,49 @@ This library is built on top of the `tiktoken` library and includes some additio
 
 For full working examples for all supported features, see the [examples](https://github.com/zurawiki/tiktoken-rs/tree/main/tiktoken-rs/examples) directory in the repository.
 
+# CLI Usage
+
+The project includes a command-line interface for token counting.
+
+## Installation
+
+```shell
+cargo install tiktoken-rs
+```
+
+## Usage
+
+```bash
+# Get help
+tiktoken --help
+
+# List all available models
+tiktoken --list-models
+
+# Count tokens in text from stdin
+echo 'Hello, world!' | tiktoken
+
+# Count tokens with a specific model
+echo 'Hello, world!' | tiktoken --model gpt-3.5-turbo
+
+# Count tokens with the o1 model
+echo 'Hello, world!' | tiktoken --model o1
+
+# Output JSON with usage percentage
+echo 'Hello, world!' | tiktoken --json
+```
+
+The CLI outputs JSON with token count, model used, context size, and remaining tokens:
+
+```json
+{
+  "token_count": 4,
+  "model": "gpt-4",
+  "context_size": 8192,
+  "remaining_tokens": 8188
+}
+```
+
 # Usage
 
 1. Install this tool locally with `cargo`

diff --git a/tiktoken-rs/src/main.rs b/tiktoken-rs/src/main.rs
@@ -0,0 +1,106 @@
+use clap::Parser;
+use serde::Serialize;
+use std::io::{self, Read};
+use tiktoken_rs::{get_bpe_from_model, model::get_context_size, tokenizer::list_available_models};
+
+#[derive(Parser)]
+#[command(
+    name = "tiktoken",
+    about = "Count tokens in text using OpenAI's tiktoken library",
+    version
+)]
+struct Args {
+    /// Model to use for tokenization (e.g., gpt-4o, gpt-3.5-turbo, o1)
+    #[arg(short, long, default_value = "gpt-4.1")]
+    model: String,
+
+    /// Output results in JSON format
+    #[arg(long)]
+    json: bool,
+
+    /// List all available models and exit
+    #[arg(long)]
+    list_models: bool,
+
+    /// Input text to count tokens for (reads from stdin if not provided)
+    #[arg(value_name = "TEXT")]
+    text: Vec<String>,
+}
+
+#[derive(Serialize)]
+struct TokenCountResponse {
+    /// Number of tokens in the input text
+    token_count: usize,
+    /// Model used for tokenization
+    model: String,
+    /// Context size for the model
+    context_size: usize,
+    /// Remaining tokens available for completion
+    remaining_tokens: usize,
+    /// Percentage of context used (rounded to 3 decimal places)
+    usage_percentage: f64,
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args = Args::parse();
+
+    // Handle list models command
+    if args.list_models {
+        println!("Available models:");
+        println!();
+
+        // Get all models from the tokenizer module
+        let models = list_available_models();
+
+        for model in models.iter() {
+            let context_size = get_context_size(model);
+            println!("  {:<25} (context: {})", model, context_size);
+        }
+
+        println!();
+        println!(
+            "Note: Many models support version suffixes (e.g., gpt-4-0314, gpt-3.5-turbo-0125)"
+        );
+        println!("      and fine-tuned models use the ft: prefix (e.g., ft:gpt-3.5-turbo:xxx:2023-11-11)");
+        return Ok(());
+    }
+
+    // Get input text from argument or stdin
+    let input_text = if !args.text.is_empty() {
+        args.text.join(" ")
+    } else {
+        let mut buffer = String::new();
+        eprintln!("🔎 Reading from stdin...");
+        io::stdin().read_to_string(&mut buffer)?;
+        buffer
+    };
+
+    // Count tokens using the specified model
+    let bpe = get_bpe_from_model(&args.model)?;
+    let token_count = bpe.encode_with_special_tokens(&input_text).len();
+    let context_size = get_context_size(&args.model);
+    let remaining_tokens = context_size.saturating_sub(token_count);
+
+    // Calculate usage percentage rounded to 3 decimal places
+    let usage_percentage = if context_size > 0 {
+        ((token_count as f64 / context_size as f64) * 100.0 * 1000.0).round() / 1000.0
+    } else {
+        0.0
+    };
+
+    // Output based on the json flag
+    if args.json {
+        let response = TokenCountResponse {
+            token_count,
+            model: args.model,
+            context_size,
+            remaining_tokens,
+            usage_percentage,
+        };
+        println!("{}", serde_json::to_string_pretty(&response)?);
+    } else {
+        println!("{token_count}");
+    }
+
+    Ok(())
+}
diff --git a/tiktoken-rs/src/tokenizer.rs b/tiktoken-rs/src/tokenizer.rs
@@ -51,7 +51,7 @@ const MODEL_PREFIX_TO_TOKENIZER: &[(&str, Tokenizer)] = &[
 
 // Keep this in sync with:
 // https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/tiktoken/model.py#L22
-const MODEL_TO_TOKENIZER: &[(&str, Tokenizer)] = &[
+pub const MODEL_TO_TOKENIZER: &[(&str, Tokenizer)] = &[
     // reasoning
     ("o1", Tokenizer::O200kBase),
     ("o3", Tokenizer::O200kBase),
@@ -119,6 +119,27 @@ lazy_static! {
     };
 }
 
+/// Returns a list of all available model names.
+///
+/// This function returns all the model names that are supported by the tokenizer.
+/// The models are returned in the order they are defined in the `MODEL_TO_TOKENIZER` constant.
+///
+/// # Examples
+///
+/// ```
+/// use tiktoken_rs::tokenizer::list_available_models;
+/// let models = list_available_models();
+/// assert!(models.contains(&"gpt-4"));
+/// assert!(models.contains(&"gpt-3.5-turbo"));
+/// ```
+///
+/// # Returns
+///
+/// A vector of string slices containing all available model names.
+pub fn list_available_models() -> Vec<&'static str> {
+    MODEL_TO_TOKENIZER.iter().map(|(model, _)| *model).collect()
+}
+
 /// Returns the tokenizer type used by a model.
 ///
 /// This function retrieves the corresponding tokenizer enum variant for the given model name. It first looks