diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml
index 0547ef72bb..9979292d50 100644
--- a/.github/workflows/python-release.yml
+++ b/.github/workflows/python-release.yml
@@ -138,6 +138,31 @@ jobs:
       - run: twine check --strict dist/*
         working-directory: ./bindings/python
 
+      - name: Report wheel sizes
+        working-directory: ./bindings/python
+        run: |
+          echo "## 🐍 Python Wheel Size — ${{ matrix.os }} ${{ matrix.target }}" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Wheel (.whl) = compressed archive downloaded from PyPI." >> $GITHUB_STEP_SUMMARY
+          echo "Installed .so/.pyd = actual shared library loaded at runtime." >> $GITHUB_STEP_SUMMARY
+          echo "The installed size is what matters for on-device deployment." >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Wheel | Wheel size | Installed .so/.pyd |" >> $GITHUB_STEP_SUMMARY
+          echo "|---|---|---|" >> $GITHUB_STEP_SUMMARY
+          EXTRACT_DIR=$(mktemp -d)
+          for f in dist/*.whl; do
+            WHL_SIZE=$(du -h "$f" | cut -f1)
+            NAME=$(basename "$f")
+            rm -rf "$EXTRACT_DIR"/*
+            (cd "$EXTRACT_DIR" && unzip -q "$(realpath -- "$OLDPWD/$f" 2>/dev/null || echo "$OLDPWD/$f")" 2>/dev/null) \
+              || unzip -q -o "$f" -d "$EXTRACT_DIR" 2>/dev/null || true
+            SO_SIZE=$(find "$EXTRACT_DIR" \( -name '*.so' -o -name '*.pyd' -o -name '*.dylib' \) -exec du -h {} \; | head -1 | cut -f1)
+            [ -z "$SO_SIZE" ] && SO_SIZE="n/a"
+            echo "| \`${NAME}\` | ${WHL_SIZE} | ${SO_SIZE} |" >> $GITHUB_STEP_SUMMARY
+          done
+          rm -rf "$EXTRACT_DIR"
+          echo "" >> $GITHUB_STEP_SUMMARY
+
       - uses: actions/upload-artifact@v4
         with:
           name: pypi_files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux }}
@@ -180,6 +205,42 @@ jobs:
         with:
           path: ./bindings/python/dist
           merge-multiple: true
+
+      - name: Wheel size summary
+        working-directory: ./bindings/python
+        run: |
+          echo "## 📦 All Python Wheel Sizes" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Wheel | Wheel size | Installed .so/.pyd |" >> $GITHUB_STEP_SUMMARY
+          echo "|---|---|---|" >> $GITHUB_STEP_SUMMARY
+          TOTAL_WHL=0
+          TOTAL_SO=0
+          EXTRACT_DIR=$(mktemp -d)
+          for f in dist/*.whl; do
+            WHL_BYTES=$(stat --format=%s "$f" 2>/dev/null || stat -f%z "$f")
+            WHL_SIZE=$(du -h "$f" | cut -f1)
+            NAME=$(basename "$f")
+            rm -rf "$EXTRACT_DIR"/*
+            unzip -q -o "$f" -d "$EXTRACT_DIR" 2>/dev/null || true
+            SO_FILE=$(find "$EXTRACT_DIR" \( -name '*.so' -o -name '*.pyd' -o -name '*.dylib' \) | head -1)
+            if [ -n "$SO_FILE" ]; then
+              SO_BYTES=$(stat --format=%s "$SO_FILE" 2>/dev/null || stat -f%z "$SO_FILE")
+              SO_SIZE=$(du -h "$SO_FILE" | cut -f1)
+              TOTAL_SO=$((TOTAL_SO + SO_BYTES))
+            else
+              SO_SIZE="n/a"
+            fi
+            echo "| \`${NAME}\` | ${WHL_SIZE} | ${SO_SIZE} |" >> $GITHUB_STEP_SUMMARY
+            TOTAL_WHL=$((TOTAL_WHL + WHL_BYTES))
+          done
+          rm -rf "$EXTRACT_DIR"
+          echo "" >> $GITHUB_STEP_SUMMARY
+          TOTAL_WHL_MB=$(echo "scale=2; $TOTAL_WHL / 1048576" | bc)
+          TOTAL_SO_MB=$(echo "scale=2; $TOTAL_SO / 1048576" | bc)
+          WHL_COUNT=$(ls dist/*.whl 2>/dev/null | wc -l | tr -d ' ')
+          echo "**Total**: ${WHL_COUNT} wheels | wheel: ${TOTAL_WHL_MB} MB | installed: ${TOTAL_SO_MB} MB" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
           # Temporary deactivation while testing abi3 CI
           # - name: Upload to PyPi
           #   working-directory: ./bindings/python
diff --git a/.github/workflows/rust-release.yml b/.github/workflows/rust-release.yml
index 05a75f072c..001c579a8e 100644
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -24,6 +24,85 @@ jobs:
           path: ~/.cargo/registry
           key: ubuntu-latest-cargo-registry-${{ hashFiles('**/Cargo.toml') }}
 
+      - name: Measure crate size
+        working-directory: ./tokenizers
+        run: |
+          echo "## 📦 Crate Size Report" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          # Packed crate size (what gets uploaded to crates.io)
+          cargo package --list --allow-dirty > /tmp/crate_files.txt
+          CRATE_FILE_COUNT=$(wc -l < /tmp/crate_files.txt | tr -d ' ')
+          PACKED_SIZE=$(cargo package --allow-dirty 2>&1 | grep -oP 'Packaged \d+ files?, \K[\d.]+ \w+' || echo "unknown")
+          echo "### Packed crate (crates.io)" >> $GITHUB_STEP_SUMMARY
+          echo "- **Size**: ${PACKED_SIZE}" >> $GITHUB_STEP_SUMMARY
+          echo "- **Files**: ${CRATE_FILE_COUNT}" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          # Linked shared library size for various feature combinations.
+          # This is the actual on-device size — what ships to users — NOT the
+          # .rlib, which contains unused code the final linker strips.
+          # We build a minimal cdylib that uses the Tokenizer API and measure it.
+          TEST_DIR=$(mktemp -d)
+          TOK_PATH="$(pwd)"
+          mkdir -p "$TEST_DIR/src"
+          cat > "$TEST_DIR/src/lib.rs" << 'RS'
+          use tokenizers::Tokenizer;
+          #[no_mangle]
+          pub extern "C" fn tokenize(path: *const u8, len: usize, input: *const u8, input_len: usize) -> usize {
+              let path = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(path, len)) };
+              let input = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(input, input_len)) };
+              let tok = Tokenizer::from_file(path).unwrap();
+              tok.encode(input, false).unwrap().get_ids().len()
+          }
+          RS
+
+          measure() {
+            local LABEL="$1"
+            local FEATURES="$2"
+            cat > "$TEST_DIR/Cargo.toml" << TOML
+          [package]
+          name = "size-test"
+          version = "0.1.0"
+          edition = "2021"
+          [lib]
+          crate-type = ["cdylib"]
+          [dependencies]
+          tokenizers = { path = "$TOK_PATH", ${FEATURES} }
+          [profile.release]
+          lto = "fat"
+          opt-level = "s"
+          strip = true
+          codegen-units = 1
+          panic = "abort"
+          TOML
+            (cd "$TEST_DIR" && cargo build --release >/dev/null 2>&1)
+            local LIB=$(find "$TEST_DIR/target/release" -maxdepth 1 \( -name '*.so' -o -name '*.dylib' -o -name '*.dll' \) | head -1)
+            if [ -n "$LIB" ]; then
+              local BYTES=$(stat --format=%s "$LIB" 2>/dev/null || stat -f%z "$LIB")
+              local KB=$((BYTES / 1024))
+              echo "| ${LABEL} | ${KB} KB |" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| ${LABEL} | build failed |" >> $GITHUB_STEP_SUMMARY
+            fi
+          }
+
+          echo "### Linked shared library size (stripped cdylib, LTO fat, opt-level=s, panic=abort)" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "This is the actual on-device size — what ships to end users." >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Feature set | Size |" >> $GITHUB_STEP_SUMMARY
+          echo "|---|---|" >> $GITHUB_STEP_SUMMARY
+
+          measure "default (all features)" ''
+          measure "inference (onig + unicode-norm + spm)" 'default-features = false, features = ["inference"]'
+          measure "minimal onig-only" 'default-features = false, features = ["onig"]'
+          measure "no-default + training" 'default-features = false, features = ["onig", "training"]'
+          measure "no-default + parallel" 'default-features = false, features = ["onig", "parallel"]'
+
+          rm -rf "$TEST_DIR"
+          echo "" >> $GITHUB_STEP_SUMMARY
+
       - name: Publish package rust
         working-directory: ./tokenizers
         if: ${{ !contains(github.ref, 'rc') }}
diff --git a/bindings/node/Cargo.toml b/bindings/node/Cargo.toml
index 42d3c97148..d834aa7617 100644
--- a/bindings/node/Cargo.toml
+++ b/bindings/node/Cargo.toml
@@ -14,7 +14,6 @@ napi        = "2"
 napi-derive = "2"
 serde       = { version = "1.0.163", features = ["derive"] }
 tokenizers  = { path = "../../tokenizers/" }
-ahash = { version = "0.8.11", features = ["serde"] }
 
 [build-dependencies]
 napi-build = "2"
diff --git a/bindings/node/src/models.rs b/bindings/node/src/models.rs
index 9ee7f60f7d..8beebb7f8a 100644
--- a/bindings/node/src/models.rs
+++ b/bindings/node/src/models.rs
@@ -1,7 +1,6 @@
 use crate::arc_rwlock_serde;
 use crate::tasks::models::{BPEFromFilesTask, WordLevelFromFilesTask, WordPieceFromFilesTask};
 use crate::trainers::Trainer;
-use ahash::AHashMap;
 use napi::bindgen_prelude::*;
 use napi_derive::napi;
 use serde::{Deserialize, Serialize};
@@ -12,6 +11,7 @@ use tokenizers as tk;
 use tokenizers::models::bpe::{BpeBuilder, Merges};
 use tokenizers::models::wordlevel::WordLevelBuilder;
 use tokenizers::models::wordpiece::WordPieceBuilder;
+use tokenizers::utils::AHashMap;
 
 #[napi]
 #[derive(Clone, Serialize, Deserialize)]
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 52da7498c7..d15f68a985 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -21,7 +21,6 @@ once_cell = "1.19.0"
 numpy = "0.28"
 ndarray = "0.16"
 itertools = "0.14"
-ahash = { version = "0.8.11", features = ["serde"] }
 pyo3-ffi = { version = "0.28" }
 
 [dependencies.tokenizers]
@@ -34,3 +33,7 @@ pyo3 = { version = "0.28", features = ["auto-initialize", "experimental-inspect"
 [features]
 default = ["ext-module"]
 ext-module = ["pyo3/extension-module"]
+
+[profile.release]
+strip = true
+lto = "fat"
diff --git a/bindings/python/src/models.rs b/bindings/python/src/models.rs
index d2f7bf7df1..a445f74c7d 100644
--- a/bindings/python/src/models.rs
+++ b/bindings/python/src/models.rs
@@ -4,7 +4,6 @@ use std::sync::{Arc, RwLock};
 
 use crate::token::PyToken;
 use crate::trainers::PyTrainer;
-use ahash::AHashMap;
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use pyo3::types::*;
@@ -14,6 +13,7 @@ use tk::models::unigram::Unigram;
 use tk::models::wordlevel::WordLevel;
 use tk::models::wordpiece::{WordPiece, WordPieceBuilder};
 use tk::models::ModelWrapper;
+use tk::utils::AHashMap;
 use tk::{Model, Token};
 use tokenizers as tk;
 
diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml
index 0e937f3cc5..ac7fa9d4c0 100644
--- a/tokenizers/Cargo.toml
+++ b/tokenizers/Cargo.toml
@@ -67,22 +67,19 @@ name = "ci_benchmark"
 harness = false
 
 [dependencies]
-rand = "0.9"
+rand = { version = "0.9", optional = true }
 onig = { version = "6.5.1", default-features = false, optional = true }
-regex = "1.10"
-regex-syntax = "0.8"
-rayon = "1.10"
-rayon-cond = "0.4"
+regex = { version = "1.10", default-features = false, features = ["std", "perf", "unicode-perl"], optional = true }
+rayon = { version = "1.10", optional = true }
+rayon-cond = { version = "0.4", optional = true }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-unicode-normalization-alignments = "0.1"
+unicode-normalization-alignments = { version = "0.1", optional = true }
 unicode_categories = "0.1"
-unicode-segmentation = "1.11"
+unicode-segmentation = { version = "1.11", optional = true }
 indicatif = { version = "0.18", optional = true }
-itertools = "0.14"
 log = "0.4"
-derive_builder = "0.20"
-spm_precompiled = "0.1.3"
+spm_precompiled = { version = "0.1.3", optional = true }
 hf-hub = { version = "0.4.1", features = ["ureq"], default-features = false, optional = true }
 daachorse = "1.0.1"
 paste = "1.0.14"
@@ -90,17 +87,21 @@ macro_rules_attribute = "0.2.0"
 thiserror = "2"
 fancy-regex = { version = "0.17", optional = true }
 getrandom = { version = "0.3" }
-esaxx-rs = { version = "0.1.10", default-features = false, features = [] }
-monostate = "0.1.12"
-ahash = { version = "0.8.11", features = ["serde"] }
-dary_heap = { version = "0.3.6", features = ["serde"] }
-compact_str = { version = "0.9", features = ["serde"] }
+esaxx-rs = { version = "0.1.10", default-features = false, features = [], optional = true }
+foldhash = "0.2"
+dary_heap = "0.3.6"
+compact_str = { version = "0.9", features = ["serde"], optional = true }
 
 [features]
-default = ["progressbar", "onig", "esaxx_fast"]
-esaxx_fast = ["esaxx-rs/cpp"]
+default = ["progressbar", "onig", "esaxx_fast", "spm", "training", "parallel", "unicode-normalization", "regex"]
+unicode-normalization = ["dep:unicode-normalization-alignments"]
+parallel = ["dep:rayon", "dep:rayon-cond"]
+training = ["dep:rand", "dep:esaxx-rs", "dep:compact_str"]
+spm = ["dep:spm_precompiled", "dep:unicode-segmentation"]
+esaxx_fast = ["dep:esaxx-rs", "esaxx-rs/cpp"]
 progressbar = ["indicatif"]
 http = ["hf-hub"]
+inference = ["onig", "unicode-normalization", "spm"]
 unstable_wasm = ["fancy-regex", "getrandom/wasm_js"]
 rustls-tls = ["hf-hub?/rustls-tls"]
 
@@ -114,6 +115,21 @@ tracing-subscriber = "0.3.18"
 [profile.release]
 lto = "fat"
 
+# Use this profile for minimal binary size (e.g. on-device deployment).
+# Pair with the `inference` feature for all inference capabilities without training/parallel:
+#   cargo build --profile release-small --no-default-features --features inference
+# For even smaller builds (nightly only):
+#   RUSTFLAGS="-Zlocation-detail=none -Zfmt-debug=none" cargo +nightly build \
+#     -Z build-std=std,panic_abort -Z build-std-features="optimize_for_size" \
+#     --target <your-target-triple> --profile release-small \
+#     --no-default-features --features inference
+[profile.release-small]
+inherits = "release"
+opt-level = "s"
+strip = true
+panic = "abort"
+codegen-units = 1
+
 [profile.profiling]
 inherits = "release"
 debug = true
diff --git a/tokenizers/README.md b/tokenizers/README.md
index 173e0bc065..7f7ab46963 100644
--- a/tokenizers/README.md
+++ b/tokenizers/README.md
@@ -135,9 +135,134 @@ fn main() -> Result<()> {
 
 ## Features
 
-- **progressbar**: The progress bar visualization is enabled by default. It might be disabled if
-  compilation for certain targets is not supported by the [termios](https://crates.io/crates/termios)
-  dependency of the [indicatif](https://crates.io/crates/indicatif) progress bar.
+All features are **enabled by default** for backward compatibility. Disable them for on-device/embedded use.
 
-- **http**: This feature enables downloading the tokenizer via HTTP. It is disabled by default.
-  With this feature enabled, `Tokenizer::from_pretrained` becomes accessible.
+| Feature | Default | Description | Deps saved |
+|---------|---------|-------------|------------|
+| `training` | on | Tokenizer training (trainers, `train()` method) | rand, esaxx-rs, compact_str |
+| `parallel` | on | Multi-threaded encoding via rayon | rayon, rayon-cond, crossbeam |
+| `spm` | on | SentencePiece precompiled normalizer (T5, mBART) | spm_precompiled, nom, unicode-segmentation |
+| `unicode-normalization` | on | NFC/NFD/NFKC/NFKD normalizers | unicode-normalization-alignments |
+| `progressbar` | on | Progress bars during training | indicatif |
+| `onig` | on | Oniguruma regex engine (C binding) | onig, onig_sys |
+| `http` | off | Download tokenizers from Hugging Face Hub | hf-hub, ureq |
+| `unstable_wasm` | off | WASM target support (uses fancy-regex) | fancy-regex |
+
+### On-device / embedded configuration
+
+```toml
+# Minimal inference-only (with Oniguruma regex):
+tokenizers = { version = "0.22", default-features = false, features = ["onig"] }
+
+# WASM (pure Rust, no C dependencies):
+tokenizers = { version = "0.22", default-features = false, features = ["unstable_wasm"] }
+```
+
+## Bundle size
+
+The deployed library size depends on how you link it. Here are measured sizes on macOS arm64:
+
+| Configuration | .dylib (shared) | .a (static) | After final link |
+|---------------|----------------|-------------|-----------------|
+| Default (all features) | 2.5 MB | 9.2 MB | ~2.5 MB |
+| Inference-only (`onig`) | 2.0 MB | 8.0 MB | ~2.0 MB |
+
+> **Note**: `.a` (static archive) files contain all object code including unused functions.
+> The linker strips dead code at final link time, so the actual contribution to your app
+> binary is close to the `.dylib` size. The `.a` size is NOT what ships to users.
+
+### Comparison with Meta pytorch/tokenizers (C++)
+
+| | Meta (C++) | HuggingFace (Rust) |
+|---|---|---|
+| Stripped binary (all tokenizer types) | **0.8 MB** | **2.0 MB** |
+| Static .a (pre-link, all deps) | 5.5 MB | 8.0 MB |
+| Features | SP, Tiktoken, Llama2c | BPE, WordPiece, Unigram, WordLevel + normalizers, pre-tokenizers, decoders, added vocab |
+
+HuggingFace is ~2.5x larger because it includes full `tokenizer.json` parsing (serde), Unicode-aware
+regex, all normalizer/pre-tokenizer/decoder types, and added vocabulary matching — features Meta's
+library doesn't have.
+
+### How to measure bundle size
+
+**1. Measure the linked shared library (what ships to users):**
+
+```bash
+# Create a test crate that links tokenizers as a cdylib
+cargo new --lib measure-size && cd measure-size
+cat >> Cargo.toml << 'EOF'
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+tokenizers = { path = "../tokenizers", default-features = false, features = ["onig"] }
+
+[profile.release]
+lto = "fat"
+opt-level = "s"
+strip = true
+EOF
+
+echo 'use tokenizers::Tokenizer;
+#[no_mangle]
+pub extern "C" fn tokenize() { let _ = Tokenizer::from_file("t.json"); }' > src/lib.rs
+
+cargo build --release
+ls -lh target/release/*.dylib  # macOS
+ls -lh target/release/*.so     # Linux
+```
+
+**2. Measure per-crate contribution with cargo-bloat:**
+
+```bash
+cargo install cargo-bloat
+cargo bloat --release --crates -n 30
+```
+
+**3. Measure dependency rlib sizes (compile-time cost):**
+
+```bash
+# Total rlib for runtime deps only
+cargo tree --edges=normal --prefix none -f '{p}' | awk '{print $1}' | sort -u | sed 's/-/_/g' > /tmp/deps.txt
+
+for f in target/release/deps/*.rlib; do
+  sz=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f" 2>/dev/null)
+  name=$(basename "$f" | sed 's/-[a-f0-9]*\.rlib//' | sed 's/^lib//')
+  echo "$sz $name"
+done | sort -t' ' -k2 | awk '!seen[$2]++ {print}' | sort -k2 > /tmp/rlibs.txt
+
+join -1 2 -2 1 /tmp/rlibs.txt /tmp/deps.txt | awk '{
+  total+=$2
+  printf "%8.1f KB  %s\n", $2/1024, $1
+} END {
+  printf "\nTOTAL: %.1f MB\n", total/1048576
+}' | sort -rn
+```
+
+**4. Track size in CI (regression test):**
+
+```bash
+#!/bin/bash
+# scripts/check-bundle-size.sh
+set -e
+
+MAX_DYLIB_KB=2500  # 2.5 MB threshold
+
+cargo build --release --no-default-features --features "onig" \
+  --target-dir /tmp/size-check
+
+SIZE=$(stat -f%z /tmp/size-check/release/libtokenizers.rlib 2>/dev/null \
+    || stat -c%s /tmp/size-check/release/libtokenizers.rlib)
+SIZE_KB=$((SIZE / 1024))
+
+echo "libtokenizers.rlib: ${SIZE_KB} KB"
+
+# For the actual linked size, build a cdylib test crate
+# (see step 1 above) and check the .dylib/.so size
+
+if [ "$SIZE_KB" -gt "$MAX_DYLIB_KB" ]; then
+  echo "FAIL: bundle size ${SIZE_KB} KB exceeds threshold ${MAX_DYLIB_KB} KB"
+  exit 1
+fi
+echo "PASS: bundle size OK"
+```
diff --git a/tokenizers/src/decoders/byte_fallback.rs b/tokenizers/src/decoders/byte_fallback.rs
index 57b7b63cd7..32d03d0729 100644
--- a/tokenizers/src/decoders/byte_fallback.rs
+++ b/tokenizers/src/decoders/byte_fallback.rs
@@ -1,23 +1,22 @@
 use crate::tokenizer::{Decoder, Result};
-use monostate::MustBe;
 
-use serde::{Deserialize, Serialize};
+impl_serde_type! {
+    #[derive(Clone, Debug)]
+    /// ByteFallback is a simple trick which converts tokens looking like `<0x61>`
+    /// to pure bytes, and attempts to make them into a string. If the tokens
+    /// cannot be decoded you will get � instead for each inconvertible byte token
+    pub struct ByteFallback;
+}
 
-#[derive(Deserialize, Clone, Debug, Serialize, Default)]
-/// ByteFallback is a simple trick which converts tokens looking like `<0x61>`
-/// to pure bytes, and attempts to make them into a string. If the tokens
-/// cannot be decoded you will get � instead for each inconvertible byte token
-#[non_exhaustive]
-pub struct ByteFallback {
-    #[serde(rename = "type")]
-    type_: MustBe!("ByteFallback"),
+impl Default for ByteFallback {
+    fn default() -> Self {
+        ByteFallback
+    }
 }
 
 impl ByteFallback {
     pub fn new() -> Self {
-        Self {
-            type_: MustBe!("ByteFallback"),
-        }
+        ByteFallback
     }
 }
 
diff --git a/tokenizers/src/decoders/ctc.rs b/tokenizers/src/decoders/ctc.rs
index 9d5a571886..c2e529cd3f 100644
--- a/tokenizers/src/decoders/ctc.rs
+++ b/tokenizers/src/decoders/ctc.rs
@@ -1,7 +1,6 @@
 use crate::decoders::wordpiece;
 use crate::tokenizer::{Decoder, Result};
 
-use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -43,22 +42,22 @@ impl Default for CTC {
 
 impl Decoder for CTC {
     fn decode_chain(&self, tokens: Vec<String>) -> Result<Vec<String>> {
-        Ok(tokens
-            .into_iter()
-            .dedup()
-            .filter_map(|token| {
-                let mut replaced = token.replace(&self.pad_token, "");
-                if self.cleanup {
-                    replaced =
-                        wordpiece::cleanup(&replaced).replace(&self.word_delimiter_token, " ");
-                }
-                if replaced.is_empty() {
-                    None
-                } else {
-                    Some(replaced)
-                }
-            })
-            .collect())
+        let mut prev: Option<String> = None;
+        let mut result = Vec::new();
+        for token in tokens {
+            if prev.as_ref() == Some(&token) {
+                continue;
+            }
+            prev = Some(token.clone());
+            let mut replaced = token.replace(&self.pad_token, "");
+            if self.cleanup {
+                replaced = wordpiece::cleanup(&replaced).replace(&self.word_delimiter_token, " ");
+            }
+            if !replaced.is_empty() {
+                result.push(replaced);
+            }
+        }
+        Ok(result)
     }
 }
 
diff --git a/tokenizers/src/decoders/fuse.rs b/tokenizers/src/decoders/fuse.rs
index 5e4a1c1197..1d0cc269b8 100644
--- a/tokenizers/src/decoders/fuse.rs
+++ b/tokenizers/src/decoders/fuse.rs
@@ -1,23 +1,23 @@
 use crate::tokenizer::{Decoder, Result};
-use monostate::MustBe;
-use serde::{Deserialize, Serialize};
 
-#[derive(Clone, Debug, Serialize, Deserialize, Default)]
-/// Fuse simply fuses all tokens into one big string.
-/// It's usually the last decoding step anyway, but this
-/// decoder exists incase some decoders need to happen after that
-/// step
-#[non_exhaustive]
-pub struct Fuse {
-    #[serde(rename = "type")]
-    type_: MustBe!("Fuse"),
+impl_serde_type! {
+    #[derive(Clone, Debug)]
+    /// Fuse simply fuses all tokens into one big string.
+    /// It's usually the last decoding step anyway, but this
+    /// decoder exists incase some decoders need to happen after that
+    /// step
+    pub struct Fuse;
+}
+
+impl Default for Fuse {
+    fn default() -> Self {
+        Fuse
+    }
 }
 
 impl Fuse {
     pub fn new() -> Self {
-        Self {
-            type_: MustBe!("Fuse"),
-        }
+        Fuse
     }
 }
 
diff --git a/tokenizers/src/lib.rs b/tokenizers/src/lib.rs
index 1233a8bd2a..9a47a75510 100644
--- a/tokenizers/src/lib.rs
+++ b/tokenizers/src/lib.rs
@@ -124,19 +124,141 @@
 //!
 //! # Features
 //!
-//! - **progressbar**: The progress bar visualization is enabled by default. It might be disabled if
-//!   compilation for certain targets is not supported by the [termios](https://crates.io/crates/termios)
-//!   dependency of the [indicatif](https://crates.io/crates/indicatif) progress bar.
+//! All features are **enabled by default** for backward compatibility. Disable them for on-device/embedded use.
 //!
-//! - **http**: This feature enables downloading the tokenizer via HTTP. It is disabled by default.
-//!   With this feature enabled, `Tokenizer::from_pretrained` becomes accessible.
+//! | Feature | Default | Description | Deps saved |
+//! |---------|---------|-------------|------------|
+//! | `training` | on | Tokenizer training (trainers, `train()` method) | rand, esaxx-rs, compact_str |
+//! | `parallel` | on | Multi-threaded encoding via rayon | rayon, rayon-cond, crossbeam |
+//! | `spm` | on | SentencePiece precompiled normalizer (T5, mBART) | spm_precompiled, nom, unicode-segmentation |
+//! | `unicode-normalization` | on | NFC/NFD/NFKC/NFKD normalizers | unicode-normalization-alignments |
+//! | `progressbar` | on | Progress bars during training | indicatif |
+//! | `onig` | on | Oniguruma regex engine (C binding) | onig, onig_sys |
+//! | `http` | off | Download tokenizers from Hugging Face Hub | hf-hub, ureq |
+//! | `unstable_wasm` | off | WASM target support (uses fancy-regex) | fancy-regex |
+//!
+//! ## On-device / embedded configuration
+//!
+//! ```toml
+//! # Minimal inference-only (with Oniguruma regex):
+//! tokenizers = { version = "0.22", default-features = false, features = ["onig"] }
+//!
+//! # WASM (pure Rust, no C dependencies):
+//! tokenizers = { version = "0.22", default-features = false, features = ["unstable_wasm"] }
+//! ```
+//!
+//! # Bundle size
+//!
+//! The deployed library size depends on how you link it. Here are measured sizes on macOS arm64:
+//!
+//! | Configuration | .dylib (shared) | .a (static) | After final link |
+//! |---------------|----------------|-------------|-----------------|
+//! | Default (all features) | 2.5 MB | 9.2 MB | ~2.5 MB |
+//! | Inference-only (`onig`) | 2.0 MB | 8.0 MB | ~2.0 MB |
+//!
+//! > **Note**: `.a` (static archive) files contain all object code including unused functions.
+//! > The linker strips dead code at final link time, so the actual contribution to your app
+//! > binary is close to the `.dylib` size. The `.a` size is NOT what ships to users.
+//!
+//! ## Comparison with Meta pytorch/tokenizers (C++)
+//!
+//! | | Meta (C++) | HuggingFace (Rust) |
+//! |---|---|---|
+//! | Stripped binary (all tokenizer types) | **0.8 MB** | **2.0 MB** |
+//! | Static .a (pre-link, all deps) | 5.5 MB | 8.0 MB |
+//! | Features | SP, Tiktoken, Llama2c | BPE, WordPiece, Unigram, WordLevel + normalizers, pre-tokenizers, decoders, added vocab |
+//!
+//! HuggingFace is ~2.5x larger because it includes full `tokenizer.json` parsing (serde), Unicode-aware
+//! regex, all normalizer/pre-tokenizer/decoder types, and added vocabulary matching — features Meta's
+//! library doesn't have.
+//!
+//! ## How to measure bundle size
+//!
+//! **1. Measure the linked shared library (what ships to users):**
+//!
+//! ```bash
+//! # Create a test crate that links tokenizers as a cdylib
+//! cargo new --lib measure-size && cd measure-size
+//! cat >> Cargo.toml << 'EOF'
+//! [lib]
+//! crate-type = ["cdylib"]
+//!
+//! [dependencies]
+//! tokenizers = { path = "../tokenizers", default-features = false, features = ["onig"] }
+//!
+//! [profile.release]
+//! lto = "fat"
+//! opt-level = "s"
+//! strip = true
+//! EOF
+//!
+//! echo 'use tokenizers::Tokenizer;
+//! #[no_mangle]
+//! pub extern "C" fn tokenize() { let _ = Tokenizer::from_file("t.json"); }' > src/lib.rs
+//!
+//! cargo build --release
+//! ls -lh target/release/*.dylib  # macOS
+//! ls -lh target/release/*.so     # Linux
+//! ```
+//!
+//! **2. Measure per-crate contribution with cargo-bloat:**
+//!
+//! ```bash
+//! cargo install cargo-bloat
+//! cargo bloat --release --crates -n 30
+//! ```
+//!
+//! **3. Measure dependency rlib sizes (compile-time cost):**
+//!
+//! ```bash
+//! # Total rlib for runtime deps only
+//! cargo tree --edges=normal --prefix none -f '{p}' | awk '{print $1}' | sort -u | sed 's/-/_/g' > /tmp/deps.txt
+//!
+//! for f in target/release/deps/*.rlib; do
+//!   sz=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f" 2>/dev/null)
+//!   name=$(basename "$f" | sed 's/-[a-f0-9]*\.rlib//' | sed 's/^lib//')
+//!   echo "$sz $name"
+//! done | sort -t' ' -k2 | awk '!seen[$2]++ {print}' | sort -k2 > /tmp/rlibs.txt
+//!
+//! join -1 2 -2 1 /tmp/rlibs.txt /tmp/deps.txt | awk '{
+//!   total+=$2
+//!   printf "%8.1f KB  %s\n", $2/1024, $1
+//! } END {
+//!   printf "\nTOTAL: %.1f MB\n", total/1048576
+//! }' | sort -rn
+//! ```
+//!
+//! **4. Track size in CI (regression test):**
+//!
+//! ```bash
+//! #!/bin/bash
+//! # scripts/check-bundle-size.sh
+//! set -e
+//!
+//! MAX_DYLIB_KB=2500  # 2.5 MB threshold
+//!
+//! cargo build --release --no-default-features --features "onig" \
+//!   --target-dir /tmp/size-check
+//!
+//! SIZE=$(stat -f%z /tmp/size-check/release/libtokenizers.rlib 2>/dev/null \
+//!     || stat -c%s /tmp/size-check/release/libtokenizers.rlib)
+//! SIZE_KB=$((SIZE / 1024))
+//!
+//! echo "libtokenizers.rlib: ${SIZE_KB} KB"
+//!
+//! # For the actual linked size, build a cdylib test crate
+//! # (see step 1 above) and check the .dylib/.so size
+//!
+//! if [ "$SIZE_KB" -gt "$MAX_DYLIB_KB" ]; then
+//!   echo "FAIL: bundle size ${SIZE_KB} KB exceeds threshold ${MAX_DYLIB_KB} KB"
+//!   exit 1
+//! fi
+//! echo "PASS: bundle size OK"
+//! ```
 
 #[macro_use]
 extern crate log;
 
-#[macro_use]
-extern crate derive_builder;
-
 #[macro_use]
 pub mod utils;
 pub mod decoders;
diff --git a/tokenizers/src/models/bpe/mod.rs b/tokenizers/src/models/bpe/mod.rs
index f0d40b2df6..f337f9b9fa 100644
--- a/tokenizers/src/models/bpe/mod.rs
+++ b/tokenizers/src/models/bpe/mod.rs
@@ -1,8 +1,10 @@
 //! [Byte Pair Encoding](https://www.aclweb.org/anthology/P16-1162/) model.
+#[cfg(feature = "training")]
 use std::{iter, mem};
 
 mod model;
 mod serialization;
+#[cfg(feature = "training")]
 pub mod trainer;
 mod word;
 
@@ -35,11 +37,13 @@ pub enum Error {
     InvalidDropout,
 }
 
+#[cfg(feature = "training")]
 /// Provides access to the `FirstLastIterator` to any Iterator
 pub(crate) trait WithFirstLastIterator: Iterator + Sized {
     fn with_first_and_last(self) -> FirstLastIterator<Self>;
 }
 
+#[cfg(feature = "training")]
 impl<I> WithFirstLastIterator for I
 where
     I: Iterator,
@@ -52,6 +56,7 @@ where
     }
 }
 
+#[cfg(feature = "training")]
 /// Provides information about whether an item is the first and/or the last of the iterator
 pub(crate) struct FirstLastIterator<I>
 where
@@ -61,6 +66,7 @@ where
     iter: iter::Peekable<I>,
 }
 
+#[cfg(feature = "training")]
 impl<I> Iterator for FirstLastIterator<I>
 where
     I: Iterator,
@@ -78,5 +84,6 @@ where
 
 // Re-export
 pub use model::*;
+#[cfg(feature = "training")]
 pub use trainer::*;
 use word::*;
diff --git a/tokenizers/src/models/bpe/model.rs b/tokenizers/src/models/bpe/model.rs
index c0e4f7d84d..9ba46ebe19 100644
--- a/tokenizers/src/models/bpe/model.rs
+++ b/tokenizers/src/models/bpe/model.rs
@@ -1,8 +1,10 @@
-use super::{super::OrderedVocabIter, trainer::BpeTrainer, Error, Pair, Word};
+#[cfg(feature = "training")]
+use super::trainer::BpeTrainer;
+use super::{super::OrderedVocabIter, Error, Pair, Word};
 use crate::tokenizer::{Model, Result, Token};
 use crate::utils::cache::{Cache, DEFAULT_CACHE_CAPACITY, MAX_LENGTH};
 use crate::utils::iter::ResultShunt;
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use serde_json::Value;
 use std::borrow::Cow;
 
@@ -510,6 +512,7 @@ impl BPE {
 }
 
 impl Model for BPE {
+    #[cfg(feature = "training")]
     type Trainer = BpeTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
@@ -585,6 +588,7 @@ impl Model for BPE {
         Ok(vec![vocab_path, merges_path])
     }
 
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> BpeTrainer {
         BpeTrainer::default()
     }
diff --git a/tokenizers/src/models/bpe/serialization.rs b/tokenizers/src/models/bpe/serialization.rs
index 98cf549445..3c0bdfb1a3 100644
--- a/tokenizers/src/models/bpe/serialization.rs
+++ b/tokenizers/src/models/bpe/serialization.rs
@@ -1,5 +1,5 @@
 use super::{super::OrderedVocabIter, convert_merges_to_hashmap, BpeBuilder, Pair, BPE};
-use ahash::AHashMap;
+use crate::utils::AHashMap;
 use serde::{
     de::{Error, MapAccess, Visitor},
     ser::SerializeStruct,
diff --git a/tokenizers/src/models/bpe/trainer.rs b/tokenizers/src/models/bpe/trainer.rs
index df68c655e9..3721d66584 100644
--- a/tokenizers/src/models/bpe/trainer.rs
+++ b/tokenizers/src/models/bpe/trainer.rs
@@ -4,7 +4,7 @@ use super::{Pair, WithFirstLastIterator, Word, BPE};
 use crate::parallelism::*;
 use crate::tokenizer::{AddedToken, Result, Trainer};
 use crate::utils::progress::{ProgressBar, ProgressFormat, ProgressStyle};
-use ahash::{AHashMap, AHashSet};
+use crate::utils::{AHashMap, AHashSet, HashMapExt, HashSetExt};
 use compact_str::CompactString;
 use dary_heap::OctonaryHeap;
 use serde::{Deserialize, Serialize};
@@ -608,9 +608,9 @@ impl BpeTrainer {
         // Transfer new vocab & options to model
         //model.vocab = word_to_id;
         model.vocab = word_to_id
-            .into_iter()
+            .into_values()
             // we have to look up the string in id_to_word because the key in word_to_id is a hash
-            .map(|(_key, val)| (id_to_word[val as usize].to_string(), val))
+            .map(|val| (id_to_word[val as usize].to_string(), val))
             .collect();
         model.vocab_r = model
             .vocab
@@ -678,7 +678,7 @@ impl Trainer for BpeTrainer {
 #[cfg(test)]
 mod tests {
     use super::{BpeTrainer, Pair, BPE};
-    use ahash::AHashMap;
+    use crate::utils::AHashMap;
     use compact_str::CompactString;
 
     #[test]
diff --git a/tokenizers/src/models/bpe/word.rs b/tokenizers/src/models/bpe/word.rs
index 7bf2dee566..a4b75b113f 100644
--- a/tokenizers/src/models/bpe/word.rs
+++ b/tokenizers/src/models/bpe/word.rs
@@ -1,6 +1,7 @@
 use super::Pair;
-use ahash::AHashMap;
+use crate::utils::AHashMap;
 use dary_heap::QuaternaryHeap;
+#[cfg(feature = "training")]
 use rand::{rng, Rng};
 use std::cmp::Ordering;
 
@@ -75,6 +76,7 @@ impl std::fmt::Debug for Word {
 }
 
 impl Word {
+    #[cfg_attr(not(feature = "training"), allow(dead_code))]
     pub(super) fn new() -> Self {
         Word { symbols: vec![] }
     }
@@ -104,6 +106,7 @@ impl Word {
         });
     }
 
+    #[cfg_attr(not(feature = "training"), allow(dead_code))]
     pub(super) fn merge(
         &mut self,
         c1: u32,
@@ -178,7 +181,18 @@ impl Word {
         );
 
         while let Some(top) = queue.pop() {
-            if dropout.map(|d| rng().random::<f32>() < d).unwrap_or(false) {
+            let should_skip = {
+                #[cfg(feature = "training")]
+                {
+                    dropout.map(|d| rng().random::<f32>() < d).unwrap_or(false)
+                }
+                #[cfg(not(feature = "training"))]
+                {
+                    let _ = &dropout;
+                    false
+                }
+            };
+            if should_skip {
                 skip.push(top);
             } else {
                 // Re-insert the skipped elements
@@ -249,6 +263,7 @@ impl Word {
         self.symbols.retain(|s| s.len != 0);
     }
 
+    #[cfg_attr(not(feature = "training"), allow(dead_code))]
     pub(super) fn get_chars(&self) -> Vec<u32> {
         self.symbols.iter().map(|s| s.c).collect()
     }
diff --git a/tokenizers/src/models/mod.rs b/tokenizers/src/models/mod.rs
index 041e3b629b..cdb2ac8d8f 100644
--- a/tokenizers/src/models/mod.rs
+++ b/tokenizers/src/models/mod.rs
@@ -5,17 +5,27 @@ pub mod unigram;
 pub mod wordlevel;
 pub mod wordpiece;
 
-use ahash::AHashMap;
+use crate::utils::AHashMap;
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
-use crate::models::bpe::{BpeTrainer, BPE};
-use crate::models::unigram::{Unigram, UnigramTrainer};
-use crate::models::wordlevel::{WordLevel, WordLevelTrainer};
-use crate::models::wordpiece::{WordPiece, WordPieceTrainer};
-use crate::{AddedToken, Model, Result, Token, Trainer};
+#[cfg(feature = "training")]
+use crate::models::bpe::BpeTrainer;
+use crate::models::bpe::BPE;
+use crate::models::unigram::Unigram;
+#[cfg(feature = "training")]
+use crate::models::unigram::UnigramTrainer;
+use crate::models::wordlevel::WordLevel;
+#[cfg(feature = "training")]
+use crate::models::wordlevel::WordLevelTrainer;
+use crate::models::wordpiece::WordPiece;
+#[cfg(feature = "training")]
+use crate::models::wordpiece::WordPieceTrainer;
+#[cfg(feature = "training")]
+use crate::{AddedToken, Trainer};
+use crate::{Model, Result, Token};
 
 /// Wraps a vocab mapping (ID -> token) to a struct that will be serialized in order
 /// of token ID, smallest to largest.
@@ -141,6 +151,7 @@ impl_enum_from!(BPE, ModelWrapper, BPE);
 impl_enum_from!(Unigram, ModelWrapper, Unigram);
 
 impl Model for ModelWrapper {
+    #[cfg(feature = "training")]
     type Trainer = TrainerWrapper;
 
     fn tokenize(&self, tokens: &str) -> Result<Vec<Token>> {
@@ -197,6 +208,7 @@ impl Model for ModelWrapper {
         }
     }
 
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> Self::Trainer {
         match self {
             Self::WordLevel(t) => t.get_trainer().into(),
@@ -224,6 +236,7 @@ impl ModelWrapper {
     }
 }
 
+#[cfg(feature = "training")]
 #[derive(Clone, Serialize, Deserialize)]
 pub enum TrainerWrapper {
     BpeTrainer(BpeTrainer),
@@ -232,6 +245,7 @@ pub enum TrainerWrapper {
     UnigramTrainer(UnigramTrainer),
 }
 
+#[cfg(feature = "training")]
 impl Trainer for TrainerWrapper {
     type Model = ModelWrapper;
 
@@ -280,9 +294,13 @@ impl Trainer for TrainerWrapper {
     }
 }
 
+#[cfg(feature = "training")]
 impl_enum_from!(BpeTrainer, TrainerWrapper, BpeTrainer);
+#[cfg(feature = "training")]
 impl_enum_from!(WordPieceTrainer, TrainerWrapper, WordPieceTrainer);
+#[cfg(feature = "training")]
 impl_enum_from!(UnigramTrainer, TrainerWrapper, UnigramTrainer);
+#[cfg(feature = "training")]
 impl_enum_from!(WordLevelTrainer, TrainerWrapper, WordLevelTrainer);
 
 #[cfg(test)]
@@ -302,7 +320,7 @@ mod tests {
     #[test]
     fn incomplete_ordered_vocab() {
         let vocab_r: AHashMap<u32, String> =
-            AHashMap::from([(0, "Hi".to_string()), (2, "There".to_string())]);
+            IntoIterator::into_iter([(0u32, "Hi".to_string()), (2, "There".to_string())]).collect();
 
         let ordered = OrderedVocabIter::new(&vocab_r);
 
diff --git a/tokenizers/src/models/unigram/lattice.rs b/tokenizers/src/models/unigram/lattice.rs
index 5464671f1a..93e75b1cb8 100644
--- a/tokenizers/src/models/unigram/lattice.rs
+++ b/tokenizers/src/models/unigram/lattice.rs
@@ -1,5 +1,7 @@
 use dary_heap::QuaternaryHeap;
+#[cfg(feature = "training")]
 use rand::distr::weighted::WeightedIndex;
+#[cfg(feature = "training")]
 use rand::{prelude::*, rng};
 use std::cell::RefCell;
 use std::cmp::{min, Ordering};
@@ -377,6 +379,7 @@ impl<'a> Lattice<'a> {
         freq * z
     }
 
+    #[cfg(feature = "training")]
     pub fn sample(&self, theta: f64) -> Vec<NodeRef> {
         let len = self.len();
         if len == 0 {
@@ -422,6 +425,7 @@ impl<'a> Lattice<'a> {
         results
     }
 
+    #[cfg(feature = "training")]
     pub fn sample_token(&self, theta: f64) -> Vec<String> {
         self.sample(theta)
             .iter()
@@ -429,6 +433,7 @@ impl<'a> Lattice<'a> {
             .collect()
     }
 
+    #[cfg(feature = "training")]
     pub fn sample_nbest(&mut self, n: usize, theta: f64) -> Vec<NodeRef> {
         let nbest_paths = self.nbest(n);
         if nbest_paths.is_empty() {
diff --git a/tokenizers/src/models/unigram/mod.rs b/tokenizers/src/models/unigram/mod.rs
index d408b5c8f0..f219cb80b6 100644
--- a/tokenizers/src/models/unigram/mod.rs
+++ b/tokenizers/src/models/unigram/mod.rs
@@ -2,9 +2,11 @@
 mod lattice;
 mod model;
 mod serialization;
+#[cfg(feature = "training")]
 mod trainer;
 mod trie;
 
 pub use lattice::*;
 pub use model::*;
+#[cfg(feature = "training")]
 pub use trainer::*;
diff --git a/tokenizers/src/models/unigram/model.rs b/tokenizers/src/models/unigram/model.rs
index 3a9a6bddbd..488065a5f5 100644
--- a/tokenizers/src/models/unigram/model.rs
+++ b/tokenizers/src/models/unigram/model.rs
@@ -1,13 +1,14 @@
+#[cfg(feature = "training")]
+use super::trainer::UnigramTrainer;
 use super::{
     lattice::Lattice,
-    trainer::UnigramTrainer,
     trie::{Trie, TrieBuilder},
 };
 use crate::tokenizer::{Model, Result, Token};
 use crate::utils::cache::{Cache, MAX_LENGTH};
 use std::collections::HashMap;
 
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use std::convert::TryInto;
 use std::fs::read_to_string;
 use std::path::{Path, PathBuf};
@@ -346,10 +347,19 @@ impl Unigram {
     fn encode_unoptimized(&self, sentence: &str) -> Result<Vec<String>> {
         let mut lattice = Lattice::from(sentence, self.bos_id, self.eos_id);
         self.populate_nodes(&mut lattice);
-        let path = match (self.nbest_size, self.alpha) {
-            (Some(n), Some(alpha)) if n > 0 => lattice.sample_nbest(n, alpha),
-            (_, Some(alpha)) => lattice.sample(alpha),
-            _ => lattice.viterbi(),
+        let path = {
+            #[cfg(feature = "training")]
+            {
+                match (self.nbest_size, self.alpha) {
+                    (Some(n), Some(alpha)) if n > 0 => lattice.sample_nbest(n, alpha),
+                    (_, Some(alpha)) => lattice.sample(alpha),
+                    _ => lattice.viterbi(),
+                }
+            }
+            #[cfg(not(feature = "training"))]
+            {
+                lattice.viterbi()
+            }
         };
         if self.fuse_unk {
             let mut results = vec![];
@@ -430,6 +440,7 @@ impl<'a> Iterator for UnigramIterator<'a> {
 }
 
 impl Model for Unigram {
+    #[cfg(feature = "training")]
     type Trainer = UnigramTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
@@ -497,6 +508,7 @@ impl Model for Unigram {
         Ok(vec![fullpath])
     }
 
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> Self::Trainer {
         UnigramTrainer::default()
     }
diff --git a/tokenizers/src/models/unigram/trainer.rs b/tokenizers/src/models/unigram/trainer.rs
index ff5ca9428a..575701f648 100644
--- a/tokenizers/src/models/unigram/trainer.rs
+++ b/tokenizers/src/models/unigram/trainer.rs
@@ -2,7 +2,7 @@ use crate::models::unigram::{lattice::Lattice, model::Unigram};
 use crate::tokenizer::{AddedToken, Result, Trainer};
 use crate::utils::parallelism::*;
 use crate::utils::progress::{ProgressBar, ProgressStyle};
-use ahash::{AHashMap, AHashSet};
+use crate::utils::{AHashMap, AHashSet, HashMapExt, HashSetExt};
 use log::debug;
 use serde::{Deserialize, Serialize};
 use std::cmp::Reverse;
@@ -45,35 +45,108 @@ fn to_log_prob(pieces: &mut [SentencePiece]) {
 
 /// A `UnigramTrainer` can train a `Unigram` model from `word_counts`.
 #[non_exhaustive]
-#[derive(Builder, Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct UnigramTrainer {
-    #[builder(default = "true")]
     pub show_progress: bool,
-    #[builder(default = "8000")]
     pub vocab_size: u32,
-    #[builder(default = "2")]
     pub n_sub_iterations: u32,
-    #[builder(default = "0.75")]
     pub shrinking_factor: f64,
-    #[builder(default = "vec![]")]
     pub special_tokens: Vec<AddedToken>,
-    #[builder(default = "AHashSet::new()")]
     pub initial_alphabet: AHashSet<char>,
-
-    #[builder(default = "None")]
     pub unk_token: Option<String>,
-
-    #[builder(default = "16")]
     pub max_piece_length: usize,
-    #[builder(default = "1_000_000")]
     seed_size: usize,
-    #[builder(default = "AHashMap::new()")]
     words: AHashMap<String, u32>,
 }
 
 impl Default for UnigramTrainer {
     fn default() -> Self {
-        Self::builder().build().unwrap()
+        Self {
+            show_progress: true,
+            vocab_size: 8000,
+            n_sub_iterations: 2,
+            shrinking_factor: 0.75,
+            special_tokens: vec![],
+            initial_alphabet: AHashSet::new(),
+            unk_token: None,
+            max_piece_length: 16,
+            seed_size: 1_000_000,
+            words: AHashMap::new(),
+        }
+    }
+}
+
+/// Builder for `UnigramTrainer`.
+#[derive(Debug, Clone, Default)]
+pub struct UnigramTrainerBuilder {
+    show_progress: Option<bool>,
+    vocab_size: Option<u32>,
+    n_sub_iterations: Option<u32>,
+    shrinking_factor: Option<f64>,
+    special_tokens: Option<Vec<AddedToken>>,
+    initial_alphabet: Option<AHashSet<char>>,
+    unk_token: Option<Option<String>>,
+    max_piece_length: Option<usize>,
+    seed_size: Option<usize>,
+}
+
+impl UnigramTrainerBuilder {
+    pub fn show_progress(&mut self, show_progress: bool) -> &mut Self {
+        self.show_progress = Some(show_progress);
+        self
+    }
+    pub fn vocab_size(&mut self, vocab_size: u32) -> &mut Self {
+        self.vocab_size = Some(vocab_size);
+        self
+    }
+    pub fn n_sub_iterations(&mut self, n_sub_iterations: u32) -> &mut Self {
+        self.n_sub_iterations = Some(n_sub_iterations);
+        self
+    }
+    pub fn shrinking_factor(&mut self, shrinking_factor: f64) -> &mut Self {
+        self.shrinking_factor = Some(shrinking_factor);
+        self
+    }
+    pub fn special_tokens(&mut self, special_tokens: Vec<AddedToken>) -> &mut Self {
+        self.special_tokens = Some(special_tokens);
+        self
+    }
+    pub fn initial_alphabet(&mut self, initial_alphabet: AHashSet<char>) -> &mut Self {
+        self.initial_alphabet = Some(initial_alphabet);
+        self
+    }
+    pub fn unk_token(&mut self, unk_token: Option<String>) -> &mut Self {
+        self.unk_token = Some(unk_token);
+        self
+    }
+    pub fn max_piece_length(&mut self, max_piece_length: usize) -> &mut Self {
+        self.max_piece_length = Some(max_piece_length);
+        self
+    }
+    pub fn seed_size(&mut self, seed_size: usize) -> &mut Self {
+        self.seed_size = Some(seed_size);
+        self
+    }
+    pub fn build(&self) -> Result<UnigramTrainer> {
+        let default = UnigramTrainer::default();
+        Ok(UnigramTrainer {
+            show_progress: self.show_progress.unwrap_or(default.show_progress),
+            vocab_size: self.vocab_size.unwrap_or(default.vocab_size),
+            n_sub_iterations: self.n_sub_iterations.unwrap_or(default.n_sub_iterations),
+            shrinking_factor: self.shrinking_factor.unwrap_or(default.shrinking_factor),
+            special_tokens: self
+                .special_tokens
+                .clone()
+                .unwrap_or(default.special_tokens),
+            initial_alphabet: self
+                .initial_alphabet
+                .clone()
+                .unwrap_or(default.initial_alphabet),
+            unk_token: self.unk_token.clone().unwrap_or(default.unk_token),
+            max_piece_length: self.max_piece_length.unwrap_or(default.max_piece_length),
+            seed_size: self.seed_size.unwrap_or(default.seed_size),
+            words: AHashMap::new(),
+        })
     }
 }
 
diff --git a/tokenizers/src/models/unigram/trie.rs b/tokenizers/src/models/unigram/trie.rs
index 7c7149d00a..894cae3cf7 100644
--- a/tokenizers/src/models/unigram/trie.rs
+++ b/tokenizers/src/models/unigram/trie.rs
@@ -1,4 +1,4 @@
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use std::hash::Hash;
 
 #[derive(Default)]
diff --git a/tokenizers/src/models/wordlevel/mod.rs b/tokenizers/src/models/wordlevel/mod.rs
index 94e7c86b4f..bd62f1b4e8 100644
--- a/tokenizers/src/models/wordlevel/mod.rs
+++ b/tokenizers/src/models/wordlevel/mod.rs
@@ -1,6 +1,6 @@
 use super::OrderedVocabIter;
 use crate::tokenizer::{Model, Result, Token};
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use serde_json::Value;
 use std::collections::HashMap;
 use std::fs::File;
@@ -8,9 +8,11 @@ use std::io::{BufReader, Read, Write};
 use std::path::{Path, PathBuf};
 
 mod serialization;
+#[cfg(feature = "training")]
 mod trainer;
 
 // Re-export
+#[cfg(feature = "training")]
 pub use trainer::*;
 
 type Vocab = AHashMap<String, u32>;
@@ -157,6 +159,7 @@ impl Default for WordLevel {
 }
 
 impl Model for WordLevel {
+    #[cfg(feature = "training")]
     type Trainer = WordLevelTrainer;
 
     fn tokenize(&self, token: &str) -> Result<Vec<Token>> {
@@ -211,6 +214,7 @@ impl Model for WordLevel {
         Ok(vec![vocab_path])
     }
 
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> Self::Trainer {
         WordLevelTrainer::default()
     }
diff --git a/tokenizers/src/models/wordlevel/serialization.rs b/tokenizers/src/models/wordlevel/serialization.rs
index 1cc79339e0..281e217ea9 100644
--- a/tokenizers/src/models/wordlevel/serialization.rs
+++ b/tokenizers/src/models/wordlevel/serialization.rs
@@ -1,5 +1,5 @@
 use super::{super::OrderedVocabIter, WordLevel, WordLevelBuilder};
-use ahash::AHashSet;
+use crate::utils::AHashSet;
 use serde::{
     de::{MapAccess, Visitor},
     ser::SerializeStruct,
diff --git a/tokenizers/src/models/wordlevel/trainer.rs b/tokenizers/src/models/wordlevel/trainer.rs
index bf980b0d32..3e2d39d484 100644
--- a/tokenizers/src/models/wordlevel/trainer.rs
+++ b/tokenizers/src/models/wordlevel/trainer.rs
@@ -1,33 +1,75 @@
 use super::WordLevel;
 use crate::utils::parallelism::*;
+use crate::utils::{AHashMap, HashMapExt};
 use crate::{AddedToken, Result, Trainer};
-use ahash::AHashMap;
 use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
 
 #[non_exhaustive]
-#[derive(Debug, Clone, Builder, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct WordLevelTrainer {
     /// The minimum frequency a word must have to be part of the vocabulary
-    #[builder(default = "0")]
     pub min_frequency: u64,
     /// The target vocabulary size
-    #[builder(default = "30_000")]
     pub vocab_size: usize,
     /// Whether to show progress while training
-    #[builder(default = "true")]
     pub show_progress: bool,
     /// A list of special tokens that the model should know of
-    #[builder(default)]
     pub special_tokens: Vec<AddedToken>,
 
-    #[builder(default, private)]
     words: AHashMap<String, u64>,
 }
 
 impl Default for WordLevelTrainer {
     fn default() -> Self {
-        Self::builder().build().unwrap()
+        Self {
+            min_frequency: 0,
+            vocab_size: 30_000,
+            show_progress: true,
+            special_tokens: vec![],
+            words: AHashMap::new(),
+        }
+    }
+}
+
+/// Builder for `WordLevelTrainer`.
+#[derive(Debug, Clone, Default)]
+pub struct WordLevelTrainerBuilder {
+    min_frequency: Option<u64>,
+    vocab_size: Option<usize>,
+    show_progress: Option<bool>,
+    special_tokens: Option<Vec<AddedToken>>,
+}
+
+impl WordLevelTrainerBuilder {
+    pub fn min_frequency(&mut self, min_frequency: u64) -> &mut Self {
+        self.min_frequency = Some(min_frequency);
+        self
+    }
+    pub fn vocab_size(&mut self, vocab_size: usize) -> &mut Self {
+        self.vocab_size = Some(vocab_size);
+        self
+    }
+    pub fn show_progress(&mut self, show_progress: bool) -> &mut Self {
+        self.show_progress = Some(show_progress);
+        self
+    }
+    pub fn special_tokens(&mut self, special_tokens: Vec<AddedToken>) -> &mut Self {
+        self.special_tokens = Some(special_tokens);
+        self
+    }
+    pub fn build(&self) -> Result<WordLevelTrainer> {
+        let default = WordLevelTrainer::default();
+        Ok(WordLevelTrainer {
+            min_frequency: self.min_frequency.unwrap_or(default.min_frequency),
+            vocab_size: self.vocab_size.unwrap_or(default.vocab_size),
+            show_progress: self.show_progress.unwrap_or(default.show_progress),
+            special_tokens: self
+                .special_tokens
+                .clone()
+                .unwrap_or(default.special_tokens),
+            words: AHashMap::new(),
+        })
     }
 }
 
diff --git a/tokenizers/src/models/wordpiece/mod.rs b/tokenizers/src/models/wordpiece/mod.rs
index 61fa44f071..790094cb9d 100644
--- a/tokenizers/src/models/wordpiece/mod.rs
+++ b/tokenizers/src/models/wordpiece/mod.rs
@@ -3,7 +3,7 @@
 
 use crate::models::bpe::BPE;
 use crate::tokenizer::{Model, Result, Token};
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use std::collections::HashMap;
 use std::{
     borrow::Cow,
@@ -14,7 +14,9 @@ use std::{
 };
 
 mod serialization;
+#[cfg(feature = "training")]
 mod trainer;
+#[cfg(feature = "training")]
 pub use trainer::*;
 
 #[derive(thiserror::Error, Debug)]
@@ -211,6 +213,7 @@ impl WordPiece {
 }
 
 impl Model for WordPiece {
+    #[cfg(feature = "training")]
     type Trainer = WordPieceTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
@@ -313,6 +316,7 @@ impl Model for WordPiece {
         Ok(vec![vocab_path])
     }
 
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> Self::Trainer {
         WordPieceTrainer::builder().build()
     }
diff --git a/tokenizers/src/models/wordpiece/serialization.rs b/tokenizers/src/models/wordpiece/serialization.rs
index 7ba496d63c..f832b6753a 100644
--- a/tokenizers/src/models/wordpiece/serialization.rs
+++ b/tokenizers/src/models/wordpiece/serialization.rs
@@ -1,5 +1,5 @@
 use super::{super::OrderedVocabIter, WordPiece, WordPieceBuilder};
-use ahash::{AHashMap, AHashSet};
+use crate::utils::{AHashMap, AHashSet};
 use serde::{
     de::{MapAccess, Visitor},
     ser::SerializeStruct,
diff --git a/tokenizers/src/models/wordpiece/trainer.rs b/tokenizers/src/models/wordpiece/trainer.rs
index 29d4561521..cba8590dc7 100644
--- a/tokenizers/src/models/wordpiece/trainer.rs
+++ b/tokenizers/src/models/wordpiece/trainer.rs
@@ -3,7 +3,7 @@ use std::collections::HashSet;
 use super::WordPiece;
 use crate::models::bpe::{BpeTrainer, BpeTrainerBuilder, BPE};
 use crate::tokenizer::{AddedToken, Result, Trainer};
-use ahash::AHashSet;
+use crate::utils::{AHashSet, HashSetExt};
 use serde::{Deserialize, Serialize};
 
 /// A `WordPieceTrainerBuilder` can be used to create a `WordPieceTrainer` with a custom
diff --git a/tokenizers/src/normalizers/bert.rs b/tokenizers/src/normalizers/bert.rs
index 90d982c680..81d8adb89e 100644
--- a/tokenizers/src/normalizers/bert.rs
+++ b/tokenizers/src/normalizers/bert.rs
@@ -108,7 +108,12 @@ impl BertNormalizer {
     }
 
     fn do_strip_accents(&self, normalized: &mut NormalizedString) {
+        #[cfg(feature = "unicode-normalization")]
         normalized.nfd().filter(|c| !c.is_mark_nonspacing());
+        #[cfg(not(feature = "unicode-normalization"))]
+        {
+            let _ = normalized;
+        }
     }
 
     fn do_lowercase(&self, normalized: &mut NormalizedString) {
diff --git a/tokenizers/src/normalizers/byte_level.rs b/tokenizers/src/normalizers/byte_level.rs
index 41fd416156..f23d0cefb9 100644
--- a/tokenizers/src/normalizers/byte_level.rs
+++ b/tokenizers/src/normalizers/byte_level.rs
@@ -1,7 +1,7 @@
 use crate::processors::byte_level::bytes_char;
 use crate::tokenizer::{NormalizedString, Normalizer, Result};
 use crate::utils::macro_rules_attribute;
-use ahash::{AHashMap, AHashSet};
+use crate::utils::{AHashMap, AHashSet};
 use std::sync::LazyLock;
 
 #[derive(Clone, Debug)]
diff --git a/tokenizers/src/normalizers/mod.rs b/tokenizers/src/normalizers/mod.rs
index f400f13da9..d0a0989d4e 100644
--- a/tokenizers/src/normalizers/mod.rs
+++ b/tokenizers/src/normalizers/mod.rs
@@ -1,5 +1,6 @@
 pub mod bert;
 pub mod byte_level;
+#[cfg(feature = "spm")]
 pub mod precompiled;
 pub mod prepend;
 pub mod replace;
@@ -8,11 +9,14 @@ pub mod unicode;
 pub mod utils;
 pub use crate::normalizers::bert::BertNormalizer;
 pub use crate::normalizers::byte_level::ByteLevel;
+#[cfg(feature = "spm")]
 pub use crate::normalizers::precompiled::Precompiled;
 pub use crate::normalizers::prepend::Prepend;
 pub use crate::normalizers::replace::Replace;
 pub use crate::normalizers::strip::{Strip, StripAccents};
-pub use crate::normalizers::unicode::{Nmt, NFC, NFD, NFKC, NFKD};
+pub use crate::normalizers::unicode::Nmt;
+#[cfg(feature = "unicode-normalization")]
+pub use crate::normalizers::unicode::{NFC, NFD, NFKC, NFKD};
 pub use crate::normalizers::utils::{Lowercase, Sequence};
 use serde::{Deserialize, Deserializer, Serialize};
 
@@ -25,13 +29,18 @@ pub enum NormalizerWrapper {
     BertNormalizer(BertNormalizer),
     StripNormalizer(Strip),
     StripAccents(StripAccents),
+    #[cfg(feature = "unicode-normalization")]
     NFC(NFC),
+    #[cfg(feature = "unicode-normalization")]
     NFD(NFD),
+    #[cfg(feature = "unicode-normalization")]
     NFKC(NFKC),
+    #[cfg(feature = "unicode-normalization")]
     NFKD(NFKD),
     Sequence(Sequence),
     Lowercase(Lowercase),
     Nmt(Nmt),
+    #[cfg(feature = "spm")]
     Precompiled(Precompiled),
     Replace(Replace),
     Prepend(Prepend),
@@ -81,14 +90,17 @@ impl<'de> Deserialize<'de> for NormalizerWrapper {
             BertNormalizer(BertNormalizer),
             StripNormalizer(Strip),
             StripAccents(StripAccents),
+            #[cfg(feature = "unicode-normalization")]
             NFC(NFC),
+            #[cfg(feature = "unicode-normalization")]
             NFD(NFD),
+            #[cfg(feature = "unicode-normalization")]
             NFKC(NFKC),
+            #[cfg(feature = "unicode-normalization")]
             NFKD(NFKD),
             Sequence(Sequence),
             Lowercase(Lowercase),
             Nmt(Nmt),
-            Precompiled(Precompiled),
             Replace(Replace),
             Prepend(Prepend),
             ByteLevel(ByteLevel),
@@ -114,18 +126,62 @@ impl<'de> Deserialize<'de> for NormalizerWrapper {
                     EnumType::StripAccents => NormalizerWrapper::StripAccents(
                         serde_json::from_value(values).map_err(serde::de::Error::custom)?,
                     ),
-                    EnumType::NFC => NormalizerWrapper::NFC(
-                        serde_json::from_value(values).map_err(serde::de::Error::custom)?,
-                    ),
-                    EnumType::NFD => NormalizerWrapper::NFD(
-                        serde_json::from_value(values).map_err(serde::de::Error::custom)?,
-                    ),
-                    EnumType::NFKC => NormalizerWrapper::NFKC(
-                        serde_json::from_value(values).map_err(serde::de::Error::custom)?,
-                    ),
-                    EnumType::NFKD => NormalizerWrapper::NFKD(
-                        serde_json::from_value(values).map_err(serde::de::Error::custom)?,
-                    ),
+                    EnumType::NFC => {
+                        #[cfg(feature = "unicode-normalization")]
+                        {
+                            NormalizerWrapper::NFC(
+                                serde_json::from_value(values).map_err(serde::de::Error::custom)?,
+                            )
+                        }
+                        #[cfg(not(feature = "unicode-normalization"))]
+                        {
+                            return Err(serde::de::Error::custom(
+                                "NFC normalizer requires the `unicode-normalization` feature",
+                            ));
+                        }
+                    }
+                    EnumType::NFD => {
+                        #[cfg(feature = "unicode-normalization")]
+                        {
+                            NormalizerWrapper::NFD(
+                                serde_json::from_value(values).map_err(serde::de::Error::custom)?,
+                            )
+                        }
+                        #[cfg(not(feature = "unicode-normalization"))]
+                        {
+                            return Err(serde::de::Error::custom(
+                                "NFD normalizer requires the `unicode-normalization` feature",
+                            ));
+                        }
+                    }
+                    EnumType::NFKC => {
+                        #[cfg(feature = "unicode-normalization")]
+                        {
+                            NormalizerWrapper::NFKC(
+                                serde_json::from_value(values).map_err(serde::de::Error::custom)?,
+                            )
+                        }
+                        #[cfg(not(feature = "unicode-normalization"))]
+                        {
+                            return Err(serde::de::Error::custom(
+                                "NFKC normalizer requires the `unicode-normalization` feature",
+                            ));
+                        }
+                    }
+                    EnumType::NFKD => {
+                        #[cfg(feature = "unicode-normalization")]
+                        {
+                            NormalizerWrapper::NFKD(
+                                serde_json::from_value(values).map_err(serde::de::Error::custom)?,
+                            )
+                        }
+                        #[cfg(not(feature = "unicode-normalization"))]
+                        {
+                            return Err(serde::de::Error::custom(
+                                "NFKD normalizer requires the `unicode-normalization` feature",
+                            ));
+                        }
+                    }
                     EnumType::Sequence => NormalizerWrapper::Sequence(
                         serde_json::from_value(values).map_err(serde::de::Error::custom)?,
                     ),
@@ -135,13 +191,24 @@ impl<'de> Deserialize<'de> for NormalizerWrapper {
                     EnumType::Nmt => NormalizerWrapper::Nmt(
                         serde_json::from_value(values).map_err(serde::de::Error::custom)?,
                     ),
-                    EnumType::Precompiled => NormalizerWrapper::Precompiled(
-                        serde_json::from_str(
-                            &serde_json::to_string(&values).expect("Can reserialize precompiled"),
-                        )
-                        // .map_err(serde::de::Error::custom)
-                        .expect("Precompiled"),
-                    ),
+                    EnumType::Precompiled => {
+                        #[cfg(feature = "spm")]
+                        {
+                            NormalizerWrapper::Precompiled(
+                                serde_json::from_str(
+                                    &serde_json::to_string(&values)
+                                        .expect("Can reserialize precompiled"),
+                                )
+                                .expect("Precompiled"),
+                            )
+                        }
+                        #[cfg(not(feature = "spm"))]
+                        {
+                            return Err(serde::de::Error::custom(
+                                "Precompiled normalizer requires the `spm` feature",
+                            ));
+                        }
+                    }
                     EnumType::Replace => NormalizerWrapper::Replace(
                         serde_json::from_value(values).map_err(serde::de::Error::custom)?,
                     ),
@@ -164,14 +231,17 @@ impl<'de> Deserialize<'de> for NormalizerWrapper {
                         NormalizerWrapper::StripNormalizer(bpe)
                     }
                     NormalizerUntagged::StripAccents(bpe) => NormalizerWrapper::StripAccents(bpe),
+                    #[cfg(feature = "unicode-normalization")]
                     NormalizerUntagged::NFC(bpe) => NormalizerWrapper::NFC(bpe),
+                    #[cfg(feature = "unicode-normalization")]
                     NormalizerUntagged::NFD(bpe) => NormalizerWrapper::NFD(bpe),
+                    #[cfg(feature = "unicode-normalization")]
                     NormalizerUntagged::NFKC(bpe) => NormalizerWrapper::NFKC(bpe),
+                    #[cfg(feature = "unicode-normalization")]
                     NormalizerUntagged::NFKD(bpe) => NormalizerWrapper::NFKD(bpe),
                     NormalizerUntagged::Sequence(seq) => NormalizerWrapper::Sequence(seq),
                     NormalizerUntagged::Lowercase(bpe) => NormalizerWrapper::Lowercase(bpe),
                     NormalizerUntagged::Nmt(bpe) => NormalizerWrapper::Nmt(bpe),
-                    NormalizerUntagged::Precompiled(bpe) => NormalizerWrapper::Precompiled(bpe),
                     NormalizerUntagged::Replace(bpe) => NormalizerWrapper::Replace(bpe),
                     NormalizerUntagged::Prepend(bpe) => NormalizerWrapper::Prepend(bpe),
                     NormalizerUntagged::ByteLevel(bpe) => NormalizerWrapper::ByteLevel(bpe),
@@ -187,13 +257,18 @@ impl Normalizer for NormalizerWrapper {
             Self::BertNormalizer(bn) => bn.normalize(normalized),
             Self::StripNormalizer(sn) => sn.normalize(normalized),
             Self::StripAccents(sn) => sn.normalize(normalized),
+            #[cfg(feature = "unicode-normalization")]
             Self::NFC(nfc) => nfc.normalize(normalized),
+            #[cfg(feature = "unicode-normalization")]
             Self::NFD(nfd) => nfd.normalize(normalized),
+            #[cfg(feature = "unicode-normalization")]
             Self::NFKC(nfkc) => nfkc.normalize(normalized),
+            #[cfg(feature = "unicode-normalization")]
             Self::NFKD(nfkd) => nfkd.normalize(normalized),
             Self::Sequence(sequence) => sequence.normalize(normalized),
             Self::Lowercase(lc) => lc.normalize(normalized),
             Self::Nmt(lc) => lc.normalize(normalized),
+            #[cfg(feature = "spm")]
             Self::Precompiled(lc) => lc.normalize(normalized),
             Self::Replace(lc) => lc.normalize(normalized),
             Self::Prepend(lc) => lc.normalize(normalized),
@@ -203,15 +278,20 @@ impl Normalizer for NormalizerWrapper {
 }
 
 impl_enum_from!(BertNormalizer, NormalizerWrapper, BertNormalizer);
+#[cfg(feature = "unicode-normalization")]
 impl_enum_from!(NFKD, NormalizerWrapper, NFKD);
+#[cfg(feature = "unicode-normalization")]
 impl_enum_from!(NFKC, NormalizerWrapper, NFKC);
+#[cfg(feature = "unicode-normalization")]
 impl_enum_from!(NFC, NormalizerWrapper, NFC);
+#[cfg(feature = "unicode-normalization")]
 impl_enum_from!(NFD, NormalizerWrapper, NFD);
 impl_enum_from!(Strip, NormalizerWrapper, StripNormalizer);
 impl_enum_from!(StripAccents, NormalizerWrapper, StripAccents);
 impl_enum_from!(Sequence, NormalizerWrapper, Sequence);
 impl_enum_from!(Lowercase, NormalizerWrapper, Lowercase);
 impl_enum_from!(Nmt, NormalizerWrapper, Nmt);
+#[cfg(feature = "spm")]
 impl_enum_from!(Precompiled, NormalizerWrapper, Precompiled);
 impl_enum_from!(Replace, NormalizerWrapper, Replace);
 impl_enum_from!(Prepend, NormalizerWrapper, Prepend);
diff --git a/tokenizers/src/normalizers/replace.rs b/tokenizers/src/normalizers/replace.rs
index 5657574830..f9730802a8 100644
--- a/tokenizers/src/normalizers/replace.rs
+++ b/tokenizers/src/normalizers/replace.rs
@@ -67,7 +67,7 @@ impl Replace {
     pub fn new<I: Into<ReplacePattern>, C: Into<String>>(pattern: I, content: C) -> Result<Self> {
         let pattern: ReplacePattern = pattern.into();
         let regex = match &pattern {
-            ReplacePattern::String(s) => SysRegex::new(&regex::escape(s))?,
+            ReplacePattern::String(s) => SysRegex::new(&crate::utils::regex_escape(s))?,
             ReplacePattern::Regex(r) => SysRegex::new(r)?,
         };
 
diff --git a/tokenizers/src/normalizers/strip.rs b/tokenizers/src/normalizers/strip.rs
index 19f5ff314d..f618be4d7e 100644
--- a/tokenizers/src/normalizers/strip.rs
+++ b/tokenizers/src/normalizers/strip.rs
@@ -1,8 +1,15 @@
 use crate::tokenizer::{NormalizedString, Normalizer, Result};
 use crate::utils::macro_rules_attribute;
 use serde::{Deserialize, Serialize};
+#[cfg(feature = "unicode-normalization")]
 use unicode_normalization_alignments::char::is_combining_mark;
 
+#[cfg(not(feature = "unicode-normalization"))]
+fn is_combining_mark(_c: char) -> bool {
+    // Without unicode-normalization feature, accent stripping is a no-op.
+    false
+}
+
 #[derive(Copy, Clone, Debug, Deserialize, Serialize)]
 #[serde(tag = "type")]
 #[non_exhaustive]
diff --git a/tokenizers/src/normalizers/unicode.rs b/tokenizers/src/normalizers/unicode.rs
index 502b4239b4..2ec4be0634 100644
--- a/tokenizers/src/normalizers/unicode.rs
+++ b/tokenizers/src/normalizers/unicode.rs
@@ -1,46 +1,54 @@
 use crate::tokenizer::{NormalizedString, Normalizer, Result};
 use crate::utils::macro_rules_attribute;
 
-#[derive(Default, Copy, Clone, Debug)]
-#[macro_rules_attribute(impl_serde_type!)]
-pub struct NFD;
-impl Normalizer for NFD {
-    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
-        normalized.nfd();
-        Ok(())
+#[cfg(feature = "unicode-normalization")]
+mod nf_normalizers {
+    use super::*;
+
+    #[derive(Default, Copy, Clone, Debug)]
+    #[macro_rules_attribute(impl_serde_type!)]
+    pub struct NFD;
+    impl Normalizer for NFD {
+        fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
+            normalized.nfd();
+            Ok(())
+        }
     }
-}
 
-#[derive(Default, Copy, Clone, Debug)]
-#[macro_rules_attribute(impl_serde_type!)]
-pub struct NFKD;
-impl Normalizer for NFKD {
-    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
-        normalized.nfkd();
-        Ok(())
+    #[derive(Default, Copy, Clone, Debug)]
+    #[macro_rules_attribute(impl_serde_type!)]
+    pub struct NFKD;
+    impl Normalizer for NFKD {
+        fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
+            normalized.nfkd();
+            Ok(())
+        }
     }
-}
 
-#[derive(Default, Copy, Clone, Debug)]
-#[macro_rules_attribute(impl_serde_type!)]
-pub struct NFC;
-impl Normalizer for NFC {
-    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
-        normalized.nfc();
-        Ok(())
+    #[derive(Default, Copy, Clone, Debug)]
+    #[macro_rules_attribute(impl_serde_type!)]
+    pub struct NFC;
+    impl Normalizer for NFC {
+        fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
+            normalized.nfc();
+            Ok(())
+        }
     }
-}
 
-#[derive(Default, Copy, Clone, Debug)]
-#[macro_rules_attribute(impl_serde_type!)]
-pub struct NFKC;
-impl Normalizer for NFKC {
-    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
-        normalized.nfkc();
-        Ok(())
+    #[derive(Default, Copy, Clone, Debug)]
+    #[macro_rules_attribute(impl_serde_type!)]
+    pub struct NFKC;
+    impl Normalizer for NFKC {
+        fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
+            normalized.nfkc();
+            Ok(())
+        }
     }
 }
 
+#[cfg(feature = "unicode-normalization")]
+pub use nf_normalizers::*;
+
 fn do_nmt(normalized: &mut NormalizedString) {
     // Ascii Control characters
     normalized
diff --git a/tokenizers/src/pre_tokenizers/byte_level.rs b/tokenizers/src/pre_tokenizers/byte_level.rs
index 8bc0f30af0..13ca230625 100644
--- a/tokenizers/src/pre_tokenizers/byte_level.rs
+++ b/tokenizers/src/pre_tokenizers/byte_level.rs
@@ -1,4 +1,6 @@
-use ahash::{AHashMap, AHashSet};
+#[cfg(test)]
+use crate::utils::HashMapExt;
+use crate::utils::{AHashMap, AHashSet};
 use std::sync::LazyLock;
 
 use crate::utils::SysRegex;
diff --git a/tokenizers/src/pre_tokenizers/metaspace.rs b/tokenizers/src/pre_tokenizers/metaspace.rs
index d821f11841..c781fa34ec 100644
--- a/tokenizers/src/pre_tokenizers/metaspace.rs
+++ b/tokenizers/src/pre_tokenizers/metaspace.rs
@@ -174,8 +174,6 @@ impl Decoder for Metaspace {
 
 #[cfg(test)]
 mod tests {
-    use regex::Regex;
-
     use super::*;
     use crate::{OffsetReferential, OffsetType};
 
@@ -278,7 +276,7 @@ mod tests {
 
         let pretok = Metaspace::new('▁', PrependScheme::First, false);
         let mut pretokenized = PreTokenizedString::from("Hey my friend <s>how▁are you");
-        let re_ref = Regex::new(r"(<s>)").unwrap();
+        let re_ref = crate::utils::SysRegex::new(r"(<s>)").unwrap();
         pretokenized
             .split(|_, sequence| sequence.split(&re_ref, SplitDelimiterBehavior::Isolated))
             .expect("Bad split");
diff --git a/tokenizers/src/pre_tokenizers/split.rs b/tokenizers/src/pre_tokenizers/split.rs
index 5f7362f71e..c176901ba1 100644
--- a/tokenizers/src/pre_tokenizers/split.rs
+++ b/tokenizers/src/pre_tokenizers/split.rs
@@ -80,7 +80,7 @@ impl Split {
     ) -> Result<Self> {
         let pattern: SplitPattern = pattern.into();
         let regex = match &pattern {
-            SplitPattern::String(s) => SysRegex::new(&regex::escape(s))?,
+            SplitPattern::String(s) => SysRegex::new(&crate::utils::regex_escape(s))?,
             SplitPattern::Regex(r) => SysRegex::new(r)?,
         };
 
diff --git a/tokenizers/src/pre_tokenizers/whitespace.rs b/tokenizers/src/pre_tokenizers/whitespace.rs
index 20cfb65193..15bb115c29 100644
--- a/tokenizers/src/pre_tokenizers/whitespace.rs
+++ b/tokenizers/src/pre_tokenizers/whitespace.rs
@@ -1,11 +1,10 @@
 use std::sync::LazyLock;
 
-use regex::Regex;
-
 use crate::tokenizer::{
     pattern::Invert, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior,
 };
 use crate::utils::macro_rules_attribute;
+use crate::utils::SysRegex;
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 #[macro_rules_attribute(impl_serde_type!)]
@@ -19,8 +18,8 @@ impl Default for Whitespace {
 
 impl PreTokenizer for Whitespace {
     fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> Result<()> {
-        static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\w+|[^\w\s]+").unwrap());
-        let re_ref: &Regex = &RE;
+        static RE: LazyLock<SysRegex> = LazyLock::new(|| SysRegex::new(r"\w+|[^\w\s]+").unwrap());
+        let re_ref: &SysRegex = &RE;
 
         pretokenized.split(|_, normalized| {
             normalized.split(Invert(re_ref), SplitDelimiterBehavior::Removed)
diff --git a/tokenizers/src/processors/bert.rs b/tokenizers/src/processors/bert.rs
index a1cab8abd1..7f2907d7ba 100644
--- a/tokenizers/src/processors/bert.rs
+++ b/tokenizers/src/processors/bert.rs
@@ -1,5 +1,5 @@
 use crate::tokenizer::{Encoding, PostProcessor, Result};
-use ahash::AHashMap;
+use crate::utils::AHashMap;
 use serde::{Deserialize, Serialize};
 use std::iter::FromIterator;
 
diff --git a/tokenizers/src/processors/roberta.rs b/tokenizers/src/processors/roberta.rs
index f2a47a9d38..5e7a3ca0be 100644
--- a/tokenizers/src/processors/roberta.rs
+++ b/tokenizers/src/processors/roberta.rs
@@ -1,6 +1,6 @@
 use crate::processors::byte_level::process_offsets;
 use crate::tokenizer::{Encoding, PostProcessor, Result};
-use ahash::AHashMap;
+use crate::utils::AHashMap;
 use serde::{Deserialize, Serialize};
 use std::iter::FromIterator;
 
diff --git a/tokenizers/src/processors/sequence.rs b/tokenizers/src/processors/sequence.rs
index f44cf54ac8..8043a455ae 100644
--- a/tokenizers/src/processors/sequence.rs
+++ b/tokenizers/src/processors/sequence.rs
@@ -73,7 +73,7 @@ mod tests {
     use super::*;
     use crate::processors::{ByteLevel, PostProcessorWrapper};
     use crate::tokenizer::{Encoding, PostProcessor};
-    use ahash::AHashMap;
+    use crate::utils::{AHashMap, HashMapExt};
     use std::iter::FromIterator;
 
     #[test]
diff --git a/tokenizers/src/processors/template.rs b/tokenizers/src/processors/template.rs
index 50fac99dfc..71377672dc 100644
--- a/tokenizers/src/processors/template.rs
+++ b/tokenizers/src/processors/template.rs
@@ -56,9 +56,8 @@
 //!
 //! [`TemplateProcessing`]: struct.TemplateProcessing.html
 //!
+use crate::utils::{AHashMap, AHashSet, HashMapExt};
 use crate::{Encoding, PostProcessor, Result};
-use ahash::{AHashMap, AHashSet};
-use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::convert::{TryFrom, TryInto};
 use std::result::Result as StdResult;
@@ -333,21 +332,15 @@ impl From<AHashMap<String, SpecialToken>> for Tokens {
 ///     .unwrap();
 /// ```
 ///
-#[derive(Debug, Clone, PartialEq, Builder, Serialize, Deserialize, Eq)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]
 #[serde(tag = "type", from = "TemplateProcessingDeserializer")]
-#[builder(build_fn(validate = "Self::validate"))]
 pub struct TemplateProcessing {
-    #[builder(try_setter, default = "\"$0\".try_into().unwrap()")]
     pub single: Template,
-    #[builder(try_setter, default = "\"$A:0 $B:1\".try_into().unwrap()")]
     pair: Template,
-    #[builder(setter(skip), default = "self.default_added(true)")]
     #[serde(skip)]
     added_single: usize,
-    #[builder(setter(skip), default = "self.default_added(false)")]
     #[serde(skip)]
     added_pair: usize,
-    #[builder(setter(into), default)]
     special_tokens: Tokens,
 }
 
@@ -405,7 +398,13 @@ impl TemplateProcessing {
 
 impl From<&str> for TemplateProcessingBuilderError {
     fn from(e: &str) -> Self {
-        e.to_string().into()
+        TemplateProcessingBuilderError(e.to_string())
+    }
+}
+
+impl From<String> for TemplateProcessingBuilderError {
+    fn from(e: String) -> Self {
+        TemplateProcessingBuilderError(e)
     }
 }
 
@@ -439,33 +438,66 @@ impl From<TemplateProcessingDeserializer> for TemplateProcessing {
     }
 }
 
-/// Count the number of added tokens in the given template
-fn count_added(container: &Template, special_tokens: Option<&Tokens>) -> usize {
-    container
-        .0
-        .iter()
-        .map(|p| match p {
-            Piece::Sequence { .. } => 0,
-            Piece::SpecialToken { id, .. } => {
-                special_tokens.map_or(0, |spt| spt.0.get(id).map_or(0, |s| s.ids.len()))
-            }
-        })
-        .sum()
+/// Error type for `TemplateProcessingBuilder`.
+#[derive(Debug, Clone)]
+pub struct TemplateProcessingBuilderError(String);
+
+impl std::fmt::Display for TemplateProcessingBuilderError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl std::error::Error for TemplateProcessingBuilderError {}
+
+/// Builder for `TemplateProcessing`.
+#[derive(Debug, Clone, Default)]
+pub struct TemplateProcessingBuilder {
+    single: Option<Template>,
+    pair: Option<Template>,
+    special_tokens: Option<Tokens>,
 }
 
 impl TemplateProcessingBuilder {
-    fn default_added(&self, is_single: bool) -> usize {
-        let container = if is_single {
-            self.single.as_ref()
-        } else {
-            self.pair.as_ref()
-        };
-        container.map_or(0, |pieces| {
-            count_added(pieces, self.special_tokens.as_ref())
-        })
+    /// Set the single template. Accepts anything that can be tried into a Template.
+    pub fn try_single<V>(&mut self, single: V) -> StdResult<&mut Self, V::Error>
+    where
+        V: TryInto<Template>,
+        V::Error: std::fmt::Debug,
+    {
+        self.single = Some(single.try_into()?);
+        Ok(self)
     }
 
-    fn validate(&self) -> std::result::Result<(), String> {
+    /// Set the pair template. Accepts anything that can be tried into a Template.
+    pub fn try_pair<V>(&mut self, pair: V) -> StdResult<&mut Self, V::Error>
+    where
+        V: TryInto<Template>,
+        V::Error: std::fmt::Debug,
+    {
+        self.pair = Some(pair.try_into()?);
+        Ok(self)
+    }
+
+    /// Set the single template directly.
+    pub fn single(&mut self, single: Template) -> &mut Self {
+        self.single = Some(single);
+        self
+    }
+
+    /// Set the pair template directly.
+    pub fn pair(&mut self, pair: Template) -> &mut Self {
+        self.pair = Some(pair);
+        self
+    }
+
+    /// Set the special tokens.
+    pub fn special_tokens<V: Into<Tokens>>(&mut self, special_tokens: V) -> &mut Self {
+        self.special_tokens = Some(special_tokens.into());
+        self
+    }
+
+    fn validate(&self) -> StdResult<(), String> {
         let pair_has_both = self.pair.as_ref().is_none_or(|pair| {
             let mut has_a = false;
             let mut has_b = false;
@@ -516,12 +548,54 @@ impl TemplateProcessingBuilder {
         if missing.is_empty() {
             Ok(())
         } else {
+            let missing_str: Vec<&str> = missing.into_iter().collect();
             Err(format!(
                 "Missing SpecialToken(s) with id(s) `{}`",
-                missing.iter().join(", ")
+                missing_str.join(", ")
             ))
         }
     }
+
+    /// Build the `TemplateProcessing`, validating the configuration.
+    pub fn build(&self) -> StdResult<TemplateProcessing, TemplateProcessingBuilderError> {
+        self.validate()
+            .map_err(TemplateProcessingBuilderError::from)?;
+
+        let single = self
+            .single
+            .clone()
+            .unwrap_or_else(|| "$0".try_into().unwrap());
+        let pair = self
+            .pair
+            .clone()
+            .unwrap_or_else(|| "$A:0 $B:1".try_into().unwrap());
+        let special_tokens = self.special_tokens.clone().unwrap_or_default();
+
+        let added_single = count_added(&single, Some(&special_tokens));
+        let added_pair = count_added(&pair, Some(&special_tokens));
+
+        Ok(TemplateProcessing {
+            single,
+            pair,
+            added_single,
+            added_pair,
+            special_tokens,
+        })
+    }
+}
+
+/// Count the number of added tokens in the given template
+fn count_added(container: &Template, special_tokens: Option<&Tokens>) -> usize {
+    container
+        .0
+        .iter()
+        .map(|p| match p {
+            Piece::Sequence { .. } => 0,
+            Piece::SpecialToken { id, .. } => {
+                special_tokens.map_or(0, |spt| spt.0.get(id).map_or(0, |s| s.ids.len()))
+            }
+        })
+        .sum()
 }
 
 impl Default for TemplateProcessing {
diff --git a/tokenizers/src/tokenizer/added_vocabulary.rs b/tokenizers/src/tokenizer/added_vocabulary.rs
index ca7bae5580..f3b60d8922 100644
--- a/tokenizers/src/tokenizer/added_vocabulary.rs
+++ b/tokenizers/src/tokenizer/added_vocabulary.rs
@@ -2,11 +2,10 @@ use super::{
     normalizer::Range, Model, NormalizedString, Normalizer, Offsets, PreTokenizedString, Result,
     Token,
 };
-use ahash::{AHashMap, AHashSet};
+use crate::utils::is_word_char;
+use crate::utils::{AHashMap, AHashSet, HashMapExt, HashSetExt};
 use daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder, MatchKind};
-use regex::Regex;
 use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer};
-use std::sync::LazyLock;
 
 /// Represent a token added by the user on top of the existing Model vocabulary.
 /// AddedToken can be configured to specify the behavior they should have in various situations
@@ -96,32 +95,27 @@ impl std::hash::Hash for AddedToken {
 
 type MatchingSet = Option<DoubleArrayAhoCorasick<u32>>;
 
-static STARTS_WITH_WORD: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\w").unwrap());
-static ENDS_WITH_WORD: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\w$").unwrap());
-static RIGHTMOST_SPACE_AT_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*").unwrap());
-static LEFTMOST_SPACE_AT_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*$").unwrap());
-
 fn ends_with_word(sentence: &str) -> bool {
-    ENDS_WITH_WORD.is_match(sentence)
+    sentence.chars().next_back().is_some_and(is_word_char)
 }
 
 fn starts_with_word(sentence: &str) -> bool {
-    STARTS_WITH_WORD.is_match(sentence)
+    sentence.chars().next().is_some_and(is_word_char)
 }
 
 fn space_leftmost_at_end(sentence: &str) -> usize {
-    if let Some(match_) = LEFTMOST_SPACE_AT_END.find(sentence) {
-        match_.start()
-    } else {
-        sentence.len()
-    }
+    sentence
+        .char_indices()
+        .rev()
+        .find(|(_, c)| !c.is_whitespace())
+        .map_or(0, |(i, c)| i + c.len_utf8())
 }
+
 fn space_rightmost_at_start(sentence: &str) -> usize {
-    if let Some(match_) = RIGHTMOST_SPACE_AT_START.find(sentence) {
-        match_.end()
-    } else {
-        0
-    }
+    sentence
+        .char_indices()
+        .find(|(_, c)| !c.is_whitespace())
+        .map_or(sentence.len(), |(i, _)| i)
 }
 ///
 /// A vocabulary built on top of the Model
@@ -793,11 +787,12 @@ mod tests {
         assert!(vocab.is_special_token("test"));
         assert_eq!(
             *vocab.get_added_tokens_decoder(),
-            AHashMap::from([
+            IntoIterator::into_iter([
                 (0, AddedToken::from("test", true)),
                 (2, AddedToken::from("added_token_1", true)),
                 (3, AddedToken::from("added_token_2", true)),
             ])
+            .collect::<AHashMap<_, _>>()
         );
         assert!(vocab.added_tokens_map.contains_key("test"));
         assert!(vocab.added_tokens_map_r.contains_key(&0));
diff --git a/tokenizers/src/tokenizer/encoding.rs b/tokenizers/src/tokenizer/encoding.rs
index f48f200a5b..3c56d3f78d 100644
--- a/tokenizers/src/tokenizer/encoding.rs
+++ b/tokenizers/src/tokenizer/encoding.rs
@@ -2,7 +2,7 @@ use crate::parallelism::*;
 use crate::tokenizer::{Offsets, Token};
 use crate::utils::padding::PaddingDirection;
 use crate::utils::truncation::TruncationDirection;
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use serde::{Deserialize, Serialize};
 use std::ops::Range;
 
@@ -890,7 +890,7 @@ mod tests {
             offsets: vec![(0, 6)],
             special_tokens_mask: vec![0],
             attention_mask: vec![1],
-            sequence_ranges: AHashMap::from([(0, 0..1)]),
+            sequence_ranges: IntoIterator::into_iter([(0, 0..1)]).collect(),
             ..Default::default()
         };
         let target_length = 2;
@@ -904,6 +904,9 @@ mod tests {
             pad_token,
             PaddingDirection::Left,
         );
-        assert_eq!(a.sequence_ranges, AHashMap::from([(0, 1..2)]));
+        assert_eq!(
+            a.sequence_ranges,
+            IntoIterator::into_iter([(0, 1..2)]).collect()
+        );
     }
 }
diff --git a/tokenizers/src/tokenizer/mod.rs b/tokenizers/src/tokenizer/mod.rs
index 8e282fba28..c4f108e36c 100644
--- a/tokenizers/src/tokenizer/mod.rs
+++ b/tokenizers/src/tokenizer/mod.rs
@@ -9,10 +9,12 @@
 //!   - [`PostProcessor`](trait.PostProcessor.html): Takes care of the processing after tokenization (like truncating, padding,
 //!     ...).
 
-use ahash::AHashMap;
+use crate::utils::AHashMap;
+#[cfg(feature = "training")]
+use std::io::BufReader;
 use std::{
     fs::{read_to_string, File},
-    io::{prelude::*, BufReader},
+    io::prelude::*,
     ops::{Deref, DerefMut},
     path::{Path, PathBuf},
 };
@@ -20,8 +22,10 @@ use std::{
 use serde::de::DeserializeOwned;
 use serde::{Deserialize, Serialize};
 
+#[cfg(feature = "training")]
 use crate::utils::iter::ResultShunt;
 use crate::utils::parallelism::*;
+#[cfg(feature = "training")]
 use crate::utils::progress::{ProgressBar, ProgressStyle};
 
 mod added_vocabulary;
@@ -68,6 +72,7 @@ pub trait PreTokenizer {
 
 /// Represents a model used during Tokenization (like BPE or Word or Unigram).
 pub trait Model {
+    #[cfg(feature = "training")]
     type Trainer: Trainer + Sync;
     /// Tokenize the given sequence into multiple underlying `Token`. The `offsets` on the `Token`
     /// are expected to be relative to the given sequence.
@@ -84,6 +89,7 @@ pub trait Model {
     /// files that need to be saved.
     fn save(&self, folder: &Path, prefix: Option<&str>) -> Result<Vec<PathBuf>>;
     /// Get an instance of a Trainer capable of training this Model
+    #[cfg(feature = "training")]
     fn get_trainer(&self) -> <Self as Model>::Trainer;
 }
 
@@ -160,6 +166,7 @@ pub trait Decoder {
 
 /// A `Trainer` has the responsibility to train a model. We feed it with lines/sentences
 /// and then it can train the given `Model`.
+#[cfg(feature = "training")]
 pub trait Trainer {
     type Model: Model + Sized;
     /// Whether we should show progress during the training.
@@ -1383,6 +1390,7 @@ where
     }
 
     /// Train our Model from files
+    #[cfg(feature = "training")]
     pub fn train_from_files<T>(&mut self, trainer: &mut T, files: Vec<String>) -> Result<&mut Self>
     where
         T: Trainer<Model = M> + Sync,
@@ -1404,9 +1412,15 @@ where
                         // We read new lines using this API instead of the Lines Iterator
                         // on purpose. We want to keep the `\n` and potential `\r` between each lines
                         // We use an iterator to be able to chain with par_bridge.
-                        itertools::Either::Left(file.lines_with_ending())
+                        let iter: Box<dyn Iterator<Item = std::io::Result<String>> + Send> =
+                            Box::new(file.lines_with_ending());
+                        iter
+                    }
+                    Err(e) => {
+                        let iter: Box<dyn Iterator<Item = std::io::Result<String>> + Send> =
+                            Box::new(std::iter::once(Err(e)));
+                        iter
                     }
-                    Err(e) => itertools::Either::Right(std::iter::once(Err(e))),
                 }
             }),
             |sequences| -> Result<()> {
@@ -1456,6 +1470,7 @@ where
     }
 
     /// Train our Model, using the given Trainer and iterator
+    #[cfg(feature = "training")]
     pub fn train<T, I, S>(&mut self, trainer: &mut T, sequences: I) -> Result<&mut Self>
     where
         T: Trainer<Model = M> + Sync,
@@ -1610,7 +1625,7 @@ mod tests {
     use super::*;
     use crate::models::wordlevel::WordLevelBuilder;
     use crate::pre_tokenizers::whitespace::WhitespaceSplit;
-    use ahash::AHashMap;
+    use crate::utils::AHashMap;
 
     /// Build a tokenizer with a known vocabulary: "a"=0, "b"=1, ... "j"=9, "<unk>"=10
     /// Uses WhitespaceSplit so each space-separated word maps to one token.
diff --git a/tokenizers/src/tokenizer/normalizer.rs b/tokenizers/src/tokenizer/normalizer.rs
index 5bebd5f7b4..fc7c1a8108 100644
--- a/tokenizers/src/tokenizer/normalizer.rs
+++ b/tokenizers/src/tokenizer/normalizer.rs
@@ -1,6 +1,7 @@
 use crate::pattern::Pattern;
 use crate::{Offsets, Result};
 use std::ops::{Bound, RangeBounds};
+#[cfg(feature = "unicode-normalization")]
 use unicode_normalization_alignments::UnicodeNormalization;
 
 use serde::{Deserialize, Serialize};
@@ -446,24 +447,28 @@ impl NormalizedString {
     }
 
     /// Applies NFD normalization
+    #[cfg(feature = "unicode-normalization")]
     pub fn nfd(&mut self) -> &mut Self {
         self.transform(self.get().to_owned().nfd(), 0);
         self
     }
 
     /// Applies NFKD normalization
+    #[cfg(feature = "unicode-normalization")]
     pub fn nfkd(&mut self) -> &mut Self {
         self.transform(self.get().to_owned().nfkd(), 0);
         self
     }
 
     /// Applies NFC normalization
+    #[cfg(feature = "unicode-normalization")]
     pub fn nfc(&mut self) -> &mut Self {
         self.transform(self.get().to_owned().nfc(), 0);
         self
     }
 
     /// Applies NFKC normalization
+    #[cfg(feature = "unicode-normalization")]
     pub fn nfkc(&mut self) -> &mut Self {
         self.transform(self.get().to_owned().nfkc(), 0);
         self
@@ -1022,7 +1027,6 @@ impl From<&str> for NormalizedString {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use regex::Regex;
     use unicode_categories::UnicodeCategories;
 
     #[test]
@@ -1482,9 +1486,9 @@ mod tests {
         s.replace("aaa", "b").unwrap();
         assert_eq!(s.get(), "bab");
 
-        // Regex
+        // Regex (via SysRegex)
         let mut s = NormalizedString::from(" Hello   friend ");
-        let re = Regex::new(r"\s+").unwrap();
+        let re = crate::utils::SysRegex::new(r"\s+").unwrap();
         s.replace(&re, "_").unwrap();
         assert_eq!(s.get(), "_Hello_friend_");
     }
diff --git a/tokenizers/src/tokenizer/pattern.rs b/tokenizers/src/tokenizer/pattern.rs
index a2a2f16841..e01b9f7ecb 100644
--- a/tokenizers/src/tokenizer/pattern.rs
+++ b/tokenizers/src/tokenizer/pattern.rs
@@ -1,6 +1,5 @@
 use crate::utils::SysRegex;
 use crate::{Offsets, Result};
-use regex::Regex;
 
 /// Pattern used to split a NormalizedString
 pub trait Pattern {
@@ -26,8 +25,23 @@ impl Pattern for &str {
             return Ok(vec![((0, inside.chars().count()), false)]);
         }
 
-        let re = Regex::new(&regex::escape(self))?;
-        (&re).find_matches(inside)
+        if inside.is_empty() {
+            return Ok(vec![((0, 0), false)]);
+        }
+
+        let mut prev = 0;
+        let mut splits = Vec::with_capacity(inside.len());
+        for (start, part) in inside.match_indices(self) {
+            if prev != start {
+                splits.push(((prev, start), false));
+            }
+            splits.push(((start, start + part.len()), true));
+            prev = start + part.len();
+        }
+        if prev != inside.len() {
+            splits.push(((prev, inside.len()), false));
+        }
+        Ok(splits)
     }
 }
 
@@ -38,7 +52,8 @@ impl Pattern for &String {
     }
 }
 
-impl Pattern for &Regex {
+#[cfg(feature = "regex")]
+impl Pattern for &regex::Regex {
     fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> {
         if inside.is_empty() {
             return Ok(vec![((0, 0), false)]);
@@ -140,7 +155,6 @@ impl<P: Pattern> Pattern for Invert<P> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use regex::Regex;
 
     macro_rules! do_test {
         ($inside: expr, $pattern: expr => @ERROR) => {
@@ -191,8 +205,10 @@ mod tests {
         do_test!("aaa", is_b => vec![((0, 3), false)]);
     }
 
+    #[cfg(feature = "regex")]
     #[test]
     fn regex() {
+        use regex::Regex;
         let is_whitespace = Regex::new(r"\s+").unwrap();
         do_test!("a   b", &is_whitespace => vec![((0, 1), false), ((1, 4), true), ((4, 5), false)]);
         do_test!("   a   b   ", &is_whitespace =>
diff --git a/tokenizers/src/utils/cache.rs b/tokenizers/src/utils/cache.rs
index 15c6b65f18..4b8c03da11 100644
--- a/tokenizers/src/utils/cache.rs
+++ b/tokenizers/src/utils/cache.rs
@@ -1,4 +1,4 @@
-use ahash::AHashMap;
+use crate::utils::{AHashMap, HashMapExt};
 use std::borrow::Borrow;
 use std::hash::Hash;
 use std::sync::RwLock;
diff --git a/tokenizers/src/utils/mod.rs b/tokenizers/src/utils/mod.rs
index c9450b3222..27561e3b69 100644
--- a/tokenizers/src/utils/mod.rs
+++ b/tokenizers/src/utils/mod.rs
@@ -23,10 +23,20 @@ pub mod truncation;
 // Re-export ProgressFormat for public API
 pub use progress::ProgressFormat;
 
-use ahash::AHashMap;
 use serde::{Serialize, Serializer};
 use std::collections::BTreeMap;
 
+/// Fast hash map using foldhash. Drop-in replacement for ahash::AHashMap.
+pub type AHashMap<K, V> = std::collections::HashMap<K, V, foldhash::fast::FixedState>;
+/// Fast hash set using foldhash. Drop-in replacement for ahash::AHashSet.
+pub type AHashSet<K> = std::collections::HashSet<K, foldhash::fast::FixedState>;
+
+// Re-export extension traits so AHashMap::new() and AHashSet::new() work.
+#[allow(unused_imports)]
+pub use foldhash::HashMapExt;
+#[allow(unused_imports)]
+pub use foldhash::HashSetExt;
+
 pub(crate) fn ordered_map<S, K, V>(
     value: &AHashMap<K, V>,
     serializer: S,
@@ -223,3 +233,28 @@ macro_rules! impl_serde_type{
 
 // Re-export macro_rules_attribute
 pub use macro_rules_attribute::macro_rules_attribute;
+
+/// Escape special regex metacharacters in a string so it can be used as a literal pattern.
+/// This replaces the dependency on `regex::escape`.
+pub fn regex_escape(text: &str) -> String {
+    let mut escaped = String::with_capacity(text.len() + 4);
+    for c in text.chars() {
+        match c {
+            '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
+            | '#' | '&' | '-' | '~' => {
+                escaped.push('\\');
+                escaped.push(c);
+            }
+            _ => escaped.push(c),
+        }
+    }
+    escaped
+}
+
+/// Check if a character is a "word" character (equivalent to `\w` in Unicode-aware regex).
+/// Matches `[a-zA-Z0-9_]`, Unicode alphabetic/numeric characters, and Unicode combining marks
+/// (category M), matching Perl/PCRE `\w` with Unicode enabled.
+pub(crate) fn is_word_char(c: char) -> bool {
+    use unicode_categories::UnicodeCategories;
+    c == '_' || c.is_alphanumeric() || c.is_mark()
+}
diff --git a/tokenizers/src/utils/padding.rs b/tokenizers/src/utils/padding.rs
index 57f8b1d418..c626f0aeda 100644
--- a/tokenizers/src/utils/padding.rs
+++ b/tokenizers/src/utils/padding.rs
@@ -84,7 +84,7 @@ pub fn pad_encodings(encodings: &mut [Encoding], params: &PaddingParams) -> Resu
 mod tests {
     use super::*;
     use crate::tokenizer::Encoding;
-    use ahash::AHashMap;
+    use crate::utils::{AHashMap, HashMapExt};
 
     #[test]
     fn pad_to_multiple() {
diff --git a/tokenizers/src/utils/parallelism.rs b/tokenizers/src/utils/parallelism.rs
index ea2fd331a6..75843e769d 100644
--- a/tokenizers/src/utils/parallelism.rs
+++ b/tokenizers/src/utils/parallelism.rs
@@ -2,16 +2,10 @@
 //! This module defines helpers to allow optional Rayon usage.
 //!
 
-use rayon::iter::IterBridge;
-use rayon::prelude::*;
-use rayon_cond::CondIterator;
 use std::sync::atomic::AtomicBool;
 use std::sync::atomic::AtomicU8;
 use std::sync::atomic::Ordering;
 
-// Re-export rayon current_num_threads
-pub use rayon::current_num_threads;
-
 pub const ENV_VARIABLE: &str = "TOKENIZERS_PARALLELISM";
 
 static USED_PARALLELISM: AtomicBool = AtomicBool::new(false);
@@ -61,190 +55,336 @@ pub fn set_parallelism(val: bool) {
     PARALLELISM.store(if val { 2 } else { 1 }, Ordering::SeqCst);
 }
 
-/// Allows to convert into an iterator that can be executed either parallelly or serially.
-///
-/// The choice is made according to the currently set `TOKENIZERS_PARALLELISM` environment variable.
-/// This variable can have one of the following values
-///   - False => "" (empty value), "false", "f", "off", "no", "n", "0"
-///   - True => Any other value
-///
-pub trait MaybeParallelIterator<P, S>
-where
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-{
-    /// Convert ourself in a CondIterator, that will be executed either in parallel or serially,
-    /// based solely on the `TOKENIZERS_PARALLELISM` environment variable
-    fn into_maybe_par_iter(self) -> CondIterator<P, S>;
-    /// Convert ourself in a CondIterator, that will be executed either in parallel or serially,
-    /// based on both the `TOKENIZERS_PARALLELISM` environment variable and the provided bool.
-    /// Both must be true to run with parallelism activated.
-    fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<P, S>;
+// Re-export rayon current_num_threads
+#[cfg(feature = "parallel")]
+pub use rayon::current_num_threads;
+
+#[cfg(not(feature = "parallel"))]
+pub fn current_num_threads() -> usize {
+    1
 }
 
-impl<P, S, I> MaybeParallelIterator<P, S> for I
-where
-    I: IntoParallelIterator<Iter = P, Item = P::Item> + IntoIterator<IntoIter = S, Item = S::Item>,
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-{
-    fn into_maybe_par_iter(self) -> CondIterator<P, S> {
-        let parallelism = get_parallelism();
-        if parallelism {
-            USED_PARALLELISM.store(true, Ordering::SeqCst);
+// ---------------------------------------------------------------------------
+// Parallel implementation using rayon + rayon-cond
+// ---------------------------------------------------------------------------
+
+#[cfg(feature = "parallel")]
+mod parallel_impl {
+    use super::*;
+    use rayon::iter::IterBridge;
+    use rayon::prelude::*;
+    use rayon_cond::CondIterator;
+
+    /// Allows to convert into an iterator that can be executed either parallelly or serially.
+    pub trait MaybeParallelIterator<P, S>
+    where
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+    {
+        fn into_maybe_par_iter(self) -> CondIterator<P, S>;
+        fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<P, S>;
+    }
+
+    impl<P, S, I> MaybeParallelIterator<P, S> for I
+    where
+        I: IntoParallelIterator<Iter = P, Item = P::Item>
+            + IntoIterator<IntoIter = S, Item = S::Item>,
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+    {
+        fn into_maybe_par_iter(self) -> CondIterator<P, S> {
+            let parallelism = get_parallelism();
+            if parallelism {
+                USED_PARALLELISM.store(true, Ordering::SeqCst);
+            }
+            CondIterator::new(self, parallelism)
+        }
+
+        fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<P, S> {
+            if cond {
+                self.into_maybe_par_iter()
+            } else {
+                CondIterator::from_serial(self)
+            }
         }
-        CondIterator::new(self, parallelism)
     }
 
-    fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<P, S> {
-        if cond {
+    /// Shared reference version of MaybeParallelIterator
+    pub trait MaybeParallelRefIterator<'data, P, S>
+    where
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+        P::Item: 'data,
+    {
+        fn maybe_par_iter(&'data self) -> CondIterator<P, S>;
+        fn maybe_par_iter_cond(&'data self, cond: bool) -> CondIterator<P, S>;
+    }
+
+    impl<'data, P, S, I: 'data + ?Sized> MaybeParallelRefIterator<'data, P, S> for I
+    where
+        &'data I: MaybeParallelIterator<P, S>,
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+        P::Item: 'data,
+    {
+        fn maybe_par_iter(&'data self) -> CondIterator<P, S> {
             self.into_maybe_par_iter()
-        } else {
-            CondIterator::from_serial(self)
+        }
+
+        fn maybe_par_iter_cond(&'data self, cond: bool) -> CondIterator<P, S> {
+            self.into_maybe_par_iter_cond(cond)
         }
     }
-}
 
-/// Shared reference version of MaybeParallelIterator, works the same but returns an iterator
-/// over references, does not consume self
-pub trait MaybeParallelRefIterator<'data, P, S>
-where
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-    P::Item: 'data,
-{
-    fn maybe_par_iter(&'data self) -> CondIterator<P, S>;
-    fn maybe_par_iter_cond(&'data self, cond: bool) -> CondIterator<P, S>;
-}
+    /// Exclusive reference version of MaybeParallelIterator
+    pub trait MaybeParallelRefMutIterator<'data, P, S>
+    where
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+        P::Item: 'data,
+    {
+        fn maybe_par_iter_mut(&'data mut self) -> CondIterator<P, S>;
+        fn maybe_par_iter_mut_cond(&'data mut self, cond: bool) -> CondIterator<P, S>;
+    }
 
-impl<'data, P, S, I: 'data + ?Sized> MaybeParallelRefIterator<'data, P, S> for I
-where
-    &'data I: MaybeParallelIterator<P, S>,
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-    P::Item: 'data,
-{
-    fn maybe_par_iter(&'data self) -> CondIterator<P, S> {
-        self.into_maybe_par_iter()
+    impl<'data, P, S, I: 'data + ?Sized> MaybeParallelRefMutIterator<'data, P, S> for I
+    where
+        &'data mut I: MaybeParallelIterator<P, S>,
+        P: ParallelIterator,
+        S: Iterator<Item = P::Item>,
+        P::Item: 'data,
+    {
+        fn maybe_par_iter_mut(&'data mut self) -> CondIterator<P, S> {
+            self.into_maybe_par_iter()
+        }
+
+        fn maybe_par_iter_mut_cond(&'data mut self, cond: bool) -> CondIterator<P, S> {
+            self.into_maybe_par_iter_cond(cond)
+        }
     }
 
-    fn maybe_par_iter_cond(&'data self, cond: bool) -> CondIterator<P, S> {
-        self.into_maybe_par_iter_cond(cond)
+    /// Converts any serial iterator into a CondIterator via par_bridge.
+    pub trait MaybeParallelBridge<T, S>
+    where
+        S: Iterator<Item = T> + Send,
+        T: Send,
+    {
+        fn maybe_par_bridge(self) -> CondIterator<IterBridge<S>, S>;
+        fn maybe_par_bridge_cond(self, cond: bool) -> CondIterator<IterBridge<S>, S>;
     }
-}
 
-/// Exclusive reference version of MaybeParallelIterator, works the same but returns an iterator
-/// over mutable references, does not consume self
-pub trait MaybeParallelRefMutIterator<'data, P, S>
-where
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-    P::Item: 'data,
-{
-    fn maybe_par_iter_mut(&'data mut self) -> CondIterator<P, S>;
-    fn maybe_par_iter_mut_cond(&'data mut self, cond: bool) -> CondIterator<P, S>;
-}
+    impl<T, S> MaybeParallelBridge<T, S> for S
+    where
+        S: Iterator<Item = T> + Send,
+        T: Send,
+    {
+        fn maybe_par_bridge(self) -> CondIterator<IterBridge<S>, S> {
+            let iter = CondIterator::from_serial(self);
 
-impl<'data, P, S, I: 'data + ?Sized> MaybeParallelRefMutIterator<'data, P, S> for I
-where
-    &'data mut I: MaybeParallelIterator<P, S>,
-    P: ParallelIterator,
-    S: Iterator<Item = P::Item>,
-    P::Item: 'data,
-{
-    fn maybe_par_iter_mut(&'data mut self) -> CondIterator<P, S> {
-        self.into_maybe_par_iter()
+            if get_parallelism() {
+                USED_PARALLELISM.store(true, Ordering::SeqCst);
+                CondIterator::from_parallel(iter.into_parallel().right().unwrap())
+            } else {
+                iter
+            }
+        }
+
+        fn maybe_par_bridge_cond(self, cond: bool) -> CondIterator<IterBridge<S>, S> {
+            if cond {
+                self.maybe_par_bridge()
+            } else {
+                CondIterator::from_serial(self)
+            }
+        }
     }
 
-    fn maybe_par_iter_mut_cond(&'data mut self, cond: bool) -> CondIterator<P, S> {
-        self.into_maybe_par_iter_cond(cond)
+    /// Allows to convert into `chunks` that can be executed either parallelly or serially.
+    pub trait MaybeParallelSlice<'data, T>
+    where
+        T: Sync,
+    {
+        fn maybe_par_chunks(
+            &'_ self,
+            chunk_size: usize,
+        ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>>;
+        fn maybe_par_chunks_cond(
+            &'_ self,
+            cond: bool,
+            chunk_size: usize,
+        ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>>;
     }
-}
 
-/// Converts any serial iterator into a CondIterator, that can either run parallelly or serially.
-pub trait MaybeParallelBridge<T, S>
-where
-    S: Iterator<Item = T> + Send,
-    T: Send,
-{
-    fn maybe_par_bridge(self) -> CondIterator<IterBridge<S>, S>;
-    fn maybe_par_bridge_cond(self, cond: bool) -> CondIterator<IterBridge<S>, S>;
+    impl<T> MaybeParallelSlice<'_, T> for [T]
+    where
+        T: Sync,
+    {
+        fn maybe_par_chunks(
+            &'_ self,
+            chunk_size: usize,
+        ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>> {
+            let parallelism = get_parallelism();
+            if parallelism {
+                CondIterator::from_parallel(self.par_chunks(chunk_size))
+            } else {
+                CondIterator::from_serial(self.chunks(chunk_size))
+            }
+        }
+        fn maybe_par_chunks_cond(
+            &'_ self,
+            cond: bool,
+            chunk_size: usize,
+        ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>> {
+            if cond {
+                self.maybe_par_chunks(chunk_size)
+            } else {
+                CondIterator::from_serial(self.chunks(chunk_size))
+            }
+        }
+    }
 }
 
-impl<T, S> MaybeParallelBridge<T, S> for S
-where
-    S: Iterator<Item = T> + Send,
-    T: Send,
-{
-    fn maybe_par_bridge(self) -> CondIterator<IterBridge<S>, S> {
-        let iter = CondIterator::from_serial(self);
-
-        if get_parallelism() {
-            USED_PARALLELISM.store(true, Ordering::SeqCst);
-            CondIterator::from_parallel(iter.into_parallel().right().unwrap())
-        } else {
-            iter
+#[cfg(feature = "parallel")]
+pub use parallel_impl::*;
+
+// ---------------------------------------------------------------------------
+// Serial-only fallback when rayon is not available
+// ---------------------------------------------------------------------------
+
+#[cfg(not(feature = "parallel"))]
+mod serial_impl {
+    /// A serial-only iterator wrapper (identity wrapper).
+    pub struct CondIterator<S> {
+        iter: S,
+    }
+
+    impl<S: Iterator> CondIterator<S> {
+        pub fn from_serial<I: IntoIterator<IntoIter = S>>(iter: I) -> Self {
+            CondIterator {
+                iter: iter.into_iter(),
+            }
         }
     }
 
-    fn maybe_par_bridge_cond(self, cond: bool) -> CondIterator<IterBridge<S>, S> {
-        if cond {
-            self.maybe_par_bridge()
-        } else {
+    impl<S: Iterator> Iterator for CondIterator<S> {
+        type Item = S::Item;
+
+        fn next(&mut self) -> Option<Self::Item> {
+            self.iter.next()
+        }
+
+        fn size_hint(&self) -> (usize, Option<usize>) {
+            self.iter.size_hint()
+        }
+    }
+
+    impl<S: Iterator> CondIterator<S>
+    where
+        S::Item: Send,
+        S: Send,
+    {
+        pub fn reduce<OP, ID>(self, identity: ID, op: OP) -> S::Item
+        where
+            OP: Fn(S::Item, S::Item) -> S::Item,
+            ID: Fn() -> S::Item,
+        {
+            self.iter.fold(identity(), |acc, item| op(acc, item))
+        }
+
+        pub fn for_each<OP>(self, op: OP)
+        where
+            OP: Fn(S::Item),
+        {
+            self.iter.for_each(op);
+        }
+    }
+
+    /// Converts into a serial CondIterator.
+    pub trait MaybeParallelIterator {
+        type Iter: Iterator;
+        fn into_maybe_par_iter(self) -> CondIterator<Self::Iter>;
+        fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<Self::Iter>;
+    }
+
+    impl<I: IntoIterator> MaybeParallelIterator for I {
+        type Iter = I::IntoIter;
+        fn into_maybe_par_iter(self) -> CondIterator<Self::Iter> {
+            CondIterator::from_serial(self)
+        }
+        fn into_maybe_par_iter_cond(self, _cond: bool) -> CondIterator<Self::Iter> {
             CondIterator::from_serial(self)
         }
     }
-}
 
-/// Allows to convert into `chunks` that can be executed either parallelly or serially.
-pub trait MaybeParallelSlice<'data, T>
-where
-    T: Sync,
-{
-    /// Create a CondIterator, that will be executed either in parallel or serially,
-    /// based solely on the `TOKENIZERS_PARALLELISM` environment variable
-    fn maybe_par_chunks(
-        &'_ self,
-        chunk_size: usize,
-    ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>>;
-    /// Create a CondIterator, that will be executed either in parallel or serially,
-    /// based on both the `TOKENIZERS_PARALLELISM` environment variable and the provided bool.
-    /// Both must be true to run with parallelism activated.
-    fn maybe_par_chunks_cond(
-        &'_ self,
-        cond: bool,
-        chunk_size: usize,
-    ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>>;
-}
+    pub trait MaybeParallelRefIterator<'data> {
+        type Iter: Iterator;
+        fn maybe_par_iter(&'data self) -> CondIterator<Self::Iter>;
+    }
 
-impl<T> MaybeParallelSlice<'_, T> for [T]
-where
-    T: Sync,
-{
-    fn maybe_par_chunks(
-        &'_ self,
-        chunk_size: usize,
-    ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>> {
-        let parallelism = get_parallelism();
-        if parallelism {
-            CondIterator::from_parallel(self.par_chunks(chunk_size))
-        } else {
-            CondIterator::from_serial(self.chunks(chunk_size))
+    impl<'data, T: 'data> MaybeParallelRefIterator<'data> for [T] {
+        type Iter = std::slice::Iter<'data, T>;
+        fn maybe_par_iter(&'data self) -> CondIterator<Self::Iter> {
+            CondIterator::from_serial(self.iter())
+        }
+    }
+
+    impl<'data, T: 'data> MaybeParallelRefIterator<'data> for Vec<T> {
+        type Iter = std::slice::Iter<'data, T>;
+        fn maybe_par_iter(&'data self) -> CondIterator<Self::Iter> {
+            CondIterator::from_serial(self.iter())
+        }
+    }
+
+    pub trait MaybeParallelRefMutIterator<'data> {
+        type Iter: Iterator;
+        fn maybe_par_iter_mut(&'data mut self) -> CondIterator<Self::Iter>;
+    }
+
+    impl<'data, T: 'data> MaybeParallelRefMutIterator<'data> for Vec<T> {
+        type Iter = std::slice::IterMut<'data, T>;
+        fn maybe_par_iter_mut(&'data mut self) -> CondIterator<Self::Iter> {
+            CondIterator::from_serial(self.iter_mut())
+        }
+    }
+
+    impl<'data, T: 'data> MaybeParallelRefMutIterator<'data> for [T] {
+        type Iter = std::slice::IterMut<'data, T>;
+        fn maybe_par_iter_mut(&'data mut self) -> CondIterator<Self::Iter> {
+            CondIterator::from_serial(self.iter_mut())
         }
     }
-    fn maybe_par_chunks_cond(
-        &'_ self,
-        cond: bool,
-        chunk_size: usize,
-    ) -> CondIterator<rayon::slice::Chunks<'_, T>, std::slice::Chunks<'_, T>> {
-        if cond {
-            self.maybe_par_chunks(chunk_size)
-        } else {
+
+    /// Serial-only bridge (identity).
+    pub trait MaybeParallelBridge: Iterator + Sized {
+        fn maybe_par_bridge(self) -> CondIterator<Self>;
+    }
+
+    impl<I: Iterator + Send> MaybeParallelBridge for I {
+        fn maybe_par_bridge(self) -> CondIterator<Self> {
+            CondIterator { iter: self }
+        }
+    }
+
+    /// Serial-only chunks.
+    pub trait MaybeParallelSlice<'data, T> {
+        fn maybe_par_chunks(
+            &'data self,
+            chunk_size: usize,
+        ) -> CondIterator<std::slice::Chunks<'data, T>>;
+    }
+
+    impl<'data, T> MaybeParallelSlice<'data, T> for [T] {
+        fn maybe_par_chunks(
+            &'data self,
+            chunk_size: usize,
+        ) -> CondIterator<std::slice::Chunks<'data, T>> {
             CondIterator::from_serial(self.chunks(chunk_size))
         }
     }
 }
 
+#[cfg(not(feature = "parallel"))]
+pub use serial_impl::*;
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/tokenizers/src/utils/progress.rs b/tokenizers/src/utils/progress.rs
index 393315163f..d9e524e5b3 100644
--- a/tokenizers/src/utils/progress.rs
+++ b/tokenizers/src/utils/progress.rs
@@ -20,6 +20,7 @@ pub(crate) use indicatif::{ProgressBar, ProgressStyle};
 
 #[cfg(not(feature = "progressbar"))]
 mod progressbar {
+    #![allow(dead_code)]
     use std::borrow::Cow;
     pub struct ProgressBar;
     impl ProgressBar {
@@ -46,4 +47,5 @@ mod progressbar {
     }
 }
 #[cfg(not(feature = "progressbar"))]
+#[allow(unused_imports)]
 pub(crate) use progressbar::{ProgressBar, ProgressStyle};
diff --git a/tokenizers/src/utils/truncation.rs b/tokenizers/src/utils/truncation.rs
index 62c4c3bf01..5a3b18a091 100644
--- a/tokenizers/src/utils/truncation.rs
+++ b/tokenizers/src/utils/truncation.rs
@@ -165,7 +165,7 @@ pub fn truncate_encodings(
 mod tests {
     use super::*;
     use crate::tokenizer::Encoding;
-    use ahash::AHashMap;
+    use crate::utils::{AHashMap, HashMapExt};
 
     fn get_empty() -> Encoding {
         Encoding::new(
diff --git a/tokenizers/tests/documentation.rs b/tokenizers/tests/documentation.rs
index ed75addede..641ce52f26 100644
--- a/tokenizers/tests/documentation.rs
+++ b/tokenizers/tests/documentation.rs
@@ -1,10 +1,10 @@
 use std::iter::FromIterator;
 
-use ahash::AHashMap;
 use tokenizers::decoders::byte_fallback::ByteFallback;
 use tokenizers::models::bpe::{BpeTrainerBuilder, BPE};
 use tokenizers::normalizers::{Sequence, Strip, NFC};
 use tokenizers::pre_tokenizers::byte_level::ByteLevel;
+use tokenizers::utils::AHashMap;
 use tokenizers::{AddedToken, TokenizerBuilder};
 use tokenizers::{DecoderWrapper, NormalizerWrapper, PostProcessorWrapper, PreTokenizerWrapper};
 use tokenizers::{Tokenizer, TokenizerImpl};
@@ -111,7 +111,7 @@ fn streaming_tokenizer() {
         ])))
         .with_pre_tokenizer(Some(ByteLevel::default()))
         .with_post_processor(Some(ByteLevel::default()))
-        .with_decoder(Some(ByteFallback::default()))
+        .with_decoder(Some(ByteFallback))
         .build()
         .unwrap();
     let mut decode_stream = tokenizer.decode_stream(false);