diff --git a/docs/components/frontend/configuration.md b/docs/components/frontend/configuration.md
index 65615ae2e5a..cdf9641fa8d 100644
--- a/docs/components/frontend/configuration.md
+++ b/docs/components/frontend/configuration.md
@@ -91,6 +91,12 @@ See the [Frontend Guide](frontend-guide.md) for KServe message formats and integ
 | `--metrics-prefix` | `DYN_METRICS_PREFIX` | `dynamo_frontend` | Prefix for frontend Prometheus metrics |
 | `--dump-config-to` | `DYN_DUMP_CONFIG_TO` | — | Dump resolved config to file path |
 
+## Tokenizer
+
+| CLI Argument | Env Var | Default | Description |
+|-------------|---------|---------|-------------|
+| `--tokenizer` | `DYN_TOKENIZER` | `default` | Tokenizer backend: `default` (HuggingFace) or `fastokens` (fastokens crate for high-performance BPE encoding). See [Tokenizer Backends](tokenizer-backends.md) |
+
 ## Experimental
 
 | CLI Argument | Env Var | Default | Description |
diff --git a/docs/components/frontend/tokenizer-backends.md b/docs/components/frontend/tokenizer-backends.md
new file mode 100644
index 00000000000..6becbb070f9
--- /dev/null
+++ b/docs/components/frontend/tokenizer-backends.md
@@ -0,0 +1,55 @@
+---
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+title: Tokenizer Backends
+---
+
+The Dynamo Frontend supports multiple tokenizer backends for BPE-based models. The backend controls how input text is tokenized before being sent to the inference engine.
+
+## Tokenizer Backends
+
+#### `default` HuggingFace Tokenizers
+
+The default backend uses the [HuggingFace `tokenizers`](https://github.com/huggingface/tokenizers) library (Rust). 
+It supports features in `tokenizer.json` files (normalizers, pre-tokenizers, post-processors, decoders, added tokens with special-token flags, and byte-fallback).
+
+#### `fastokens` High-Performance BPE Encoding
+
+The `fastokens` backend uses the [`fastokens`](https://github.com/Atero-ai/fastokens) crate, a purpose-built BPE encoder optimized for throughput.
+It is a _hybrid_ backend: encoding uses `fastokens` while decoding falls back to HuggingFace so that incremental detokenization, byte-fallback, and special-token handling work correctly.
+
+Use this backend when tokenization is a measurable bottleneck, for example on high-concurrency prefill-heavy workloads.
+
+#### Compatibility notes:
+
+- Works with standard BPE `tokenizer.json` files (Qwen, LLaMA, GPT-family, Mistral, DeepSeek, etc.).
+- If `fastokens` cannot load a particular tokenizer file, the frontend logs a warning and transparently falls back to HuggingFace; requests are never dropped.
+- Has no effect on TikToken-format tokenizers (`.model` / `.tiktoken` files), which always use the TikToken backend.
+
+## Configuration
+
+Set the backend with a CLI flag or environment variable. The CLI flag takes precedence.
+
+| CLI Argument | Env Var | Valid values | Default |
+|---|---|---|---|
+| `--tokenizer` | `DYN_TOKENIZER` | `default`, `fastokens` | `default` |
+
+**Examples:**
+
+```bash
+# CLI flag
+python -m dynamo.frontend --tokenizer fastokens
+
+# Environment variable
+export DYN_TOKENIZER=fastokens
+python -m dynamo.frontend
+```
+
+## Dynamo Frontend Behavior
+
+When `DYN_TOKENIZER=fastokens` is set:
+
+1. The frontend passes the environment variable to the Rust runtime.
+2. When building the tokenizer for a model, `ModelDeploymentCard::tokenizer()` attempts to load `fastokens::Tokenizer` from the same `tokenizer.json` file.
+3. If loading succeeds, a hybrid `FastTokenizer` is created that encodes with `fastokens` and decodes with HuggingFace.
+4. If loading fails (unsupported tokenizer features, missing file, etc.), the frontend logs a warning and falls back to the standard HuggingFace backend; no operator intervention is needed.
diff --git a/docs/index.yml b/docs/index.yml
index 4e1df0cff0a..817e2d2b952 100644
--- a/docs/index.yml
+++ b/docs/index.yml
@@ -200,6 +200,8 @@ navigation:
         contents:
           - page: Frontend Guide
             path: components/frontend/frontend-guide.md
+          - page: Tokenizer Backends
+            path: components/frontend/tokenizer-backends.md
       - section: Router
         path: components/router/README.md
         contents:
diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml
index b14226b160a..17b1c655915 100644
--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -30,7 +30,11 @@ bench = ["dynamo-kv-router/bench"]
 kv-router-stress = ["dep:clap", "dep:indicatif", "bench"]
 
 [[bench]]
-name = "tokenizer"
+name = "tokenizer_simple"
+harness = false
+
+[[bench]]
+name = "tokenizer_dataset"
 harness = false
 
 [[bench]]
diff --git a/lib/llm/benches/tokenizer_dataset.rs b/lib/llm/benches/tokenizer_dataset.rs
new file mode 100644
index 00000000000..5baa70dc06d
--- /dev/null
+++ b/lib/llm/benches/tokenizer_dataset.rs
@@ -0,0 +1,319 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Dataset-driven benchmarks
+//!
+//! Downloads a real dataset (LongBench-v2) from HuggingFace Hub and benchmarks
+//! per-sample encode throughput with correctness verification.
+//!
+//! Run:
+//!   cargo bench --bench tokenizer_dataset
+//!
+//! Override tokenizer (default: Qwen/Qwen3-0.6B):
+//!   TOKENIZER_PATH=deepseek-ai/DeepSeek-V3 cargo bench --bench tokenizer_dataset
+//!
+//! Override dataset and sample count:
+//!   DATASET=RyokoAI/ShareGPT52K MAX_SAMPLES=50 cargo bench --bench tokenizer_dataset
+//!
+//! Batch benchmark (default: sequential):
+//!   BATCH_SIZE=64 cargo bench --bench tokenizer_dataset
+
+use std::path::Path;
+use std::time::{Duration, Instant};
+
+use dynamo_llm::tokenizers::{FastTokenizer, HuggingFaceTokenizer, traits::Encoder};
+
+/// Default HuggingFace model for the tokenizer.
+const DEFAULT_HF_MODEL: &str = "Qwen/Qwen3-0.6B";
+
+/// Default dataset on HuggingFace Hub.
+const DEFAULT_DATASET: &str = "zai-org/LongBench-v2";
+
+/// Resolve tokenizer path: local file, HF model name, or default.
+fn resolve_tokenizer_path() -> String {
+    let input = std::env::var("TOKENIZER_PATH").ok();
+
+    if let Some(ref p) = input
+        && Path::new(p).is_file()
+    {
+        eprintln!("[setup] Using local tokenizer: {p}");
+        return p.clone();
+    }
+
+    let model_name = input.as_deref().unwrap_or(DEFAULT_HF_MODEL);
+    eprintln!("[setup] Downloading tokenizer for {model_name}...");
+
+    let cache = hf_hub::Cache::default();
+    let api = hf_hub::api::sync::ApiBuilder::from_cache(cache)
+        .with_progress(true)
+        .build()
+        .expect("Failed to create HuggingFace API client");
+
+    let repo = api.model(model_name.to_string());
+    let path = repo
+        .get("tokenizer.json")
+        .expect("Failed to download tokenizer.json");
+
+    let path_str = path.display().to_string();
+    eprintln!("[setup] Tokenizer: {path_str}");
+    path_str
+}
+
+/// Return the JSON filename for a known HuggingFace Hub dataset.
+fn dataset_json_file(dataset: &str) -> &'static str {
+    match dataset {
+        "RyokoAI/ShareGPT52K" => "sg_90k_part1.json",
+        "zai-org/LongBench-v2" => "data.json",
+        _ => panic!(
+            "Unknown dataset: {dataset}. Supported: zai-org/LongBench-v2, RyokoAI/ShareGPT52K"
+        ),
+    }
+}
+
+/// Extract a text sample from a single JSON item.
+fn extract_text(dataset: &str, item: &serde_json::Value) -> Option<String> {
+    match dataset {
+        "RyokoAI/ShareGPT52K" => {
+            let messages = item.get("conversations")?.as_array()?;
+            let parts: Vec<String> = messages
+                .iter()
+                .filter_map(|msg| {
+                    let role = msg.get("from")?.as_str()?;
+                    let value = msg.get("value")?.as_str()?;
+                    if value.is_empty() {
+                        return None;
+                    }
+                    Some(format!("[{role}]: {value}"))
+                })
+                .collect();
+            if parts.is_empty() {
+                return None;
+            }
+            Some(parts.join("\n\n"))
+        }
+        "zai-org/LongBench-v2" => {
+            let context = item.get("context")?.as_str()?;
+            if context.is_empty() {
+                return None;
+            }
+            Some(context.to_string())
+        }
+        _ => None,
+    }
+}
+
+/// Load text samples from a HuggingFace Hub dataset.
+fn load_dataset(dataset: &str, max_items: usize) -> Vec<String> {
+    let json_file = dataset_json_file(dataset);
+
+    eprintln!("[setup] Downloading dataset {dataset}...");
+    let api = hf_hub::api::sync::Api::new().expect("Failed to create HuggingFace API client");
+    let repo = api.dataset(dataset.to_string());
+    let json_path = repo.get(json_file).expect("Failed to download dataset");
+
+    let text = std::fs::read_to_string(&json_path).expect("Failed to read dataset JSON");
+    let data: Vec<serde_json::Value> =
+        serde_json::from_str(&text).expect("Failed to parse dataset JSON");
+
+    let samples: Vec<String> = data
+        .iter()
+        .take(max_items)
+        .filter_map(|item| extract_text(dataset, item))
+        .collect();
+
+    eprintln!("[setup] Loaded {} samples", samples.len());
+    samples
+}
+
+fn print_summary(
+    label: &str,
+    n: usize,
+    total_chars: u64,
+    total_tokens: u64,
+    total_hf: Duration,
+    total_ft: Duration,
+) {
+    let hf_ms = total_hf.as_secs_f64() * 1000.0;
+    let ft_ms = total_ft.as_secs_f64() * 1000.0;
+    let speedup = hf_ms / ft_ms;
+    let nf = n as f64;
+
+    println!();
+    println!("===  {label} ({n} samples)  ===");
+    println!("  Total chars:        {total_chars}");
+    println!("  Total tokens:       {total_tokens}");
+    println!("  ---");
+    println!("  HF total:           {hf_ms:>10.2} ms");
+    println!("  fastokens total:   {ft_ms:>10.2} ms");
+    println!("  Speedup:            {speedup:>10.2}x");
+    println!("  ---");
+    println!("  HF avg/sample:      {:>10.3} ms", hf_ms / nf);
+    println!("  ft avg/sample:      {:>10.3} ms", ft_ms / nf);
+    println!(
+        "  HF throughput:      {:>10.2} MB/s",
+        total_chars as f64 / total_hf.as_secs_f64() / 1_000_000.0
+    );
+    println!(
+        "  ft throughput:      {:>10.2} MB/s",
+        total_chars as f64 / total_ft.as_secs_f64() / 1_000_000.0
+    );
+}
+
+fn bench_sequential(samples: &[String], hf: &HuggingFaceTokenizer, fast: &FastTokenizer) {
+    let mut total_hf = Duration::ZERO;
+    let mut total_ft = Duration::ZERO;
+    let mut total_tokens: u64 = 0;
+    let mut total_chars: u64 = 0;
+    let mut mismatches = 0u64;
+
+    for (i, text) in samples.iter().enumerate() {
+        let t0 = Instant::now();
+        let hf_enc = hf.encode(text).expect("HF encode failed");
+        let t1 = Instant::now();
+        let ft_enc = fast.encode(text).expect("fastokens encode failed");
+        let t2 = Instant::now();
+
+        let dt_hf = t1 - t0;
+        let dt_ft = t2 - t1;
+
+        if hf_enc.token_ids() != ft_enc.token_ids() {
+            mismatches += 1;
+            if mismatches <= 3 {
+                eprintln!(
+                    "[MISMATCH] sample {i}: hf={} tokens, ft={} tokens",
+                    hf_enc.token_ids().len(),
+                    ft_enc.token_ids().len()
+                );
+            }
+        }
+
+        total_hf += dt_hf;
+        total_ft += dt_ft;
+        total_tokens += ft_enc.token_ids().len() as u64;
+        total_chars += text.len() as u64;
+
+        if (i + 1) % 20 == 0 {
+            eprintln!("[progress] {}/{}", i + 1, samples.len());
+        }
+    }
+
+    if mismatches > 0 {
+        eprintln!("[WARNING] {mismatches} samples had mismatched token IDs");
+    } else {
+        eprintln!("[OK] All samples produced identical token IDs");
+    }
+
+    print_summary(
+        "Sequential Benchmark",
+        samples.len(),
+        total_chars,
+        total_tokens,
+        total_hf,
+        total_ft,
+    );
+}
+
+fn bench_batched(
+    samples: &[String],
+    hf: &HuggingFaceTokenizer,
+    fast: &FastTokenizer,
+    batch_size: usize,
+) {
+    let mut total_hf = Duration::ZERO;
+    let mut total_ft = Duration::ZERO;
+    let mut total_tokens: u64 = 0;
+    let mut total_chars: u64 = 0;
+    let mut mismatches = 0u64;
+
+    let num_batches = samples.len().div_ceil(batch_size);
+
+    for (batch_idx, batch) in samples.chunks(batch_size).enumerate() {
+        let batch_refs: Vec<&str> = batch.iter().map(|s| s.as_str()).collect();
+        let batch_chars: u64 = batch.iter().map(|s| s.len() as u64).sum();
+
+        let t0 = Instant::now();
+        let hf_results = hf
+            .encode_batch(&batch_refs)
+            .expect("HF encode_batch failed");
+        let t1 = Instant::now();
+        let ft_results = fast
+            .encode_batch(&batch_refs)
+            .expect("fastokens encode_batch failed");
+        let t2 = Instant::now();
+
+        // Verify correctness per sample within the batch
+        for (j, (hf_enc, ft_enc)) in hf_results.iter().zip(ft_results.iter()).enumerate() {
+            if hf_enc.token_ids() != ft_enc.token_ids() {
+                mismatches += 1;
+                if mismatches <= 3 {
+                    let global_idx = batch_idx * batch_size + j;
+                    eprintln!(
+                        "[MISMATCH] sample {global_idx}: hf={} tokens, ft={} tokens",
+                        hf_enc.token_ids().len(),
+                        ft_enc.token_ids().len()
+                    );
+                }
+            }
+        }
+
+        let batch_tokens: u64 = ft_results
+            .iter()
+            .map(|enc| enc.token_ids().len() as u64)
+            .sum();
+
+        total_hf += t1 - t0;
+        total_ft += t2 - t1;
+        total_tokens += batch_tokens;
+        total_chars += batch_chars;
+
+        if (batch_idx + 1) % 5 == 0 {
+            eprintln!("[progress] batch {}/{num_batches}", batch_idx + 1);
+        }
+    }
+
+    if mismatches > 0 {
+        eprintln!("[WARNING] {mismatches} samples had mismatched token IDs");
+    } else {
+        eprintln!("[OK] All samples produced identical token IDs");
+    }
+
+    print_summary(
+        &format!("Batched Benchmark (batch_size={batch_size})"),
+        samples.len(),
+        total_chars,
+        total_tokens,
+        total_hf,
+        total_ft,
+    );
+}
+
+fn main() {
+    let tokenizer_path = resolve_tokenizer_path();
+    let dataset = std::env::var("DATASET").unwrap_or_else(|_| DEFAULT_DATASET.to_string());
+    let max_samples: usize = std::env::var("MAX_SAMPLES")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(503);
+    let batch_size: Option<usize> = std::env::var("BATCH_SIZE")
+        .ok()
+        .and_then(|v| v.parse().ok());
+
+    let samples = load_dataset(&dataset, max_samples);
+
+    let hf = HuggingFaceTokenizer::from_file(&tokenizer_path)
+        .expect("Failed to load HuggingFace tokenizer");
+    let fast =
+        FastTokenizer::from_file(&tokenizer_path).expect("Failed to load fastokens tokenizer");
+
+    // Warmup
+    if let Some(s) = samples.first() {
+        let _ = hf.encode(s.as_str());
+        let _ = fast.encode(s.as_str());
+    }
+
+    if let Some(bs) = batch_size {
+        bench_batched(&samples, &hf, &fast, bs);
+    } else {
+        bench_sequential(&samples, &hf, &fast);
+    }
+}
diff --git a/lib/llm/benches/tokenizer.rs b/lib/llm/benches/tokenizer_simple.rs
similarity index 60%
rename from lib/llm/benches/tokenizer.rs
rename to lib/llm/benches/tokenizer_simple.rs
index 5a2403e693a..d25c8dc308a 100644
--- a/lib/llm/benches/tokenizer.rs
+++ b/lib/llm/benches/tokenizer_simple.rs
@@ -9,10 +9,12 @@ use criterion::{Criterion, Throughput, criterion_group, criterion_main};
 use dynamo_llm::backend::Decoder;
 use dynamo_llm::protocols::common::StopConditions;
 use dynamo_llm::tokenizers::DecodeStream;
+use dynamo_llm::tokenizers::FastTokenizer;
 use dynamo_llm::tokenizers::hf::HuggingFaceTokenizer;
 use dynamo_llm::tokenizers::tiktoken::TikTokenTokenizer;
 use dynamo_llm::tokenizers::traits::{Encoder, Tokenizer};
 use dynamo_llm::types::TokenIdType;
+use std::path::Path;
 
 const TEST_TOKENIZER: &str = concat!(
     env!("CARGO_MANIFEST_DIR"),
@@ -137,12 +139,112 @@ pub fn tiktoken_decode(c: &mut Criterion) {
     group.finish();
 }
 
+// ---------------------------------------------------------------------------
+// Tokenizer backend benchmarks
+//
+// By default these use the in-tree TinyLlama tokenizer. Override with a
+// production-size tokenizer for more realistic numbers:
+//   TOKENIZER_PATH=/path/to/tokenizer.json cargo bench -- fastokens
+//   TOKENIZER_PATH=Qwen/Qwen3-0.6B        cargo bench -- fastokens
+// ---------------------------------------------------------------------------
+
+/// Default HuggingFace model to download when TOKENIZER_PATH is not set.
+const DEFAULT_HF_MODEL: &str = "Qwen/Qwen3-0.6B";
+
+/// Resolve a tokenizer.json path from TOKENIZER_PATH env var or download from HF Hub.
+fn resolve_tokenizer_path() -> String {
+    let input = std::env::var("TOKENIZER_PATH").ok();
+
+    if let Some(ref p) = input
+        && Path::new(p).is_file()
+    {
+        return p.clone();
+    }
+
+    let model_name = input.as_deref().unwrap_or(DEFAULT_HF_MODEL);
+    let cache = hf_hub::Cache::default();
+    let api = hf_hub::api::sync::ApiBuilder::from_cache(cache)
+        .with_progress(true)
+        .build()
+        .expect("Failed to create HuggingFace API client");
+
+    let repo = api.model(model_name.to_string());
+    repo.get("tokenizer.json")
+        .expect("Failed to download tokenizer.json from HuggingFace Hub")
+        .display()
+        .to_string()
+}
+
+const FASTOKENS_BATCH_SIZE: usize = 64;
+
+pub fn fastokens_encode(c: &mut Criterion) {
+    let tokenizer_path = resolve_tokenizer_path();
+    let test_str: &str = &INPUT_STR.repeat(TARGET_ISL / INPUT_STR.len());
+
+    let hf_encoder = HuggingFaceTokenizer::from_file(&tokenizer_path).unwrap();
+    let fast_encoder = FastTokenizer::from_file(&tokenizer_path).unwrap();
+
+    // Verify parity before benchmarking
+    let hf_ids = hf_encoder.encode(INPUT_STR).unwrap();
+    let fast_ids = fast_encoder.encode(INPUT_STR).unwrap();
+    assert_eq!(
+        hf_ids.token_ids(),
+        fast_ids.token_ids(),
+        "fastokens and HuggingFace must produce identical token IDs"
+    );
+
+    let mut group = c.benchmark_group("fastokens-encode");
+    group.throughput(Throughput::Bytes(test_str.len() as u64));
+
+    group.bench_function("hf_encode", |b| {
+        b.iter(|| {
+            let _ = hf_encoder.encode(black_box(test_str)).unwrap();
+        })
+    });
+
+    group.bench_function("fastokens_encode", |b| {
+        b.iter(|| {
+            let _ = fast_encoder.encode(black_box(test_str)).unwrap();
+        })
+    });
+
+    group.finish();
+}
+
+pub fn fastokens_batch_encode(c: &mut Criterion) {
+    let tokenizer_path = resolve_tokenizer_path();
+    let batch: Vec<&str> = (0..FASTOKENS_BATCH_SIZE).map(|_| INPUT_STR).collect();
+    let total_bytes: u64 = batch.iter().map(|s| s.len() as u64).sum();
+
+    let hf_encoder = HuggingFaceTokenizer::from_file(&tokenizer_path).unwrap();
+    let fast_encoder = FastTokenizer::from_file(&tokenizer_path).unwrap();
+
+    let mut group = c.benchmark_group("fastokens-batch-encode");
+    group.throughput(Throughput::Bytes(total_bytes));
+
+    group.bench_function("hf_batch_encode", |b| {
+        b.iter(|| {
+            let _ = hf_encoder.encode_batch(black_box(&batch)).unwrap();
+        })
+    });
+
+    group.bench_function("fastokens_batch_encode", |b| {
+        b.iter(|| {
+            let _ = fast_encoder.encode_batch(black_box(&batch)).unwrap();
+        })
+    });
+
+    group.finish();
+}
+
 criterion_group!(
     benches,
     encode,
     decode,
     decode_big,
     tiktoken_encode,
-    tiktoken_decode
+    tiktoken_decode,
+    fastokens_encode,
+    fastokens_batch_encode
 );
 criterion_main!(benches);