From 13eccdd7a52d990eb3f830f1a0d9f58e0092ccd8 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 14:33:59 -0700 Subject: [PATCH 01/14] feat: integrate fasttokens high-performance BPE tokenizer backend Add the fastokens crate (v0.1.0 from github.com/Atero-ai/fastokens) as an always-on workspace dependency for high-performance BPE encoding. Core integration: - lib/llm/src/tokenizers/fast.rs: hybrid FastTokenizer that encodes with fastokens and decodes with HuggingFace, with 4 unit tests - lib/llm/src/model_card.rs: tokenizer() checks DYN_TOKENIZER_BACKEND=fasttokens env var, falls back to HuggingFace on load failure Frontend CLI: - --dyn-tokenizer-backend flag / DYN_TOKENIZER_BACKEND env var with values "default" (HuggingFace) or "fasttokens" --- Cargo.lock | 174 ++++++++++++++++ Cargo.toml | 1 + .../src/dynamo/frontend/frontend_args.py | 14 ++ components/src/dynamo/frontend/main.py | 2 + lib/bindings/python/Cargo.lock | 197 +++++++++++++++++- lib/bindings/python/pyproject.toml | 4 +- lib/llm/Cargo.toml | 1 + lib/llm/src/model_card.rs | 26 +++ lib/llm/src/tokenizers.rs | 2 + lib/llm/src/tokenizers/fast.rs | 144 +++++++++++++ .../sample-models/minimal-bpe/tokenizer.json | 52 +++++ 11 files changed, 611 insertions(+), 6 deletions(-) create mode 100644 lib/llm/src/tokenizers/fast.rs create mode 100644 lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json diff --git a/Cargo.lock b/Cargo.lock index b678dcc3e79..1934a4bd718 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,6 +1482,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "daachorse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36" + [[package]] name = "darling" version = "0.20.11" @@ -1957,6 +1963,7 @@ dependencies = [ "dynamo-runtime", "dynamo-tokens", "either", + "fastokens", "ffmpeg-next", "flate2", "futures", @@ -2389,6 +2396,35 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fancy-regex" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" +dependencies = [ + "bit-set 0.8.0", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fastokens" +version = "0.1.0" +source = "git+https://github.com/Atero-ai/fastokens#b8e895bdec4173d27e37758b1893f39557adf733" +dependencies = [ + "daachorse", + "fancy-regex 0.17.0", + "hf-hub", + "icu_normalizer", + "memchr", + "pcre2", + "rayon", + "serde", + "serde_json", + "strum", + "thiserror 2.0.18", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2539,6 +2575,21 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2977,6 +3028,7 @@ dependencies = [ "indicatif 0.17.11", "libc", "log", + "native-tls", "num_cpus", "rand 0.9.2", "reqwest 0.12.28", @@ -3160,6 +3212,22 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.20" @@ -3246,6 +3314,9 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", + "utf16_iter", + "utf8_iter", + "write16", "zerovec", ] @@ -4523,6 +4594,23 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe 0.2.1", + "openssl-sys", + "schannel", + "security-framework 3.7.0", + "security-framework-sys", + "tempfile", +] + [[package]] name = "ndarray" version = "0.16.1" @@ -5148,6 +5236,32 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "openssl" +version = "0.10.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" +dependencies = [ + "bitflags 2.11.0", + "cfg-if 1.0.4", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "openssl-probe" version = "0.1.6" @@ -5160,6 +5274,18 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "openssl-sys" +version = "0.9.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "opentelemetry" version = "0.31.0" @@ -5345,6 +5471,28 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pcre2" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "pear" version = "0.2.9" @@ -6319,11 +6467,13 @@ dependencies = [ "http-body-util", "hyper 1.8.1", "hyper-rustls", + "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -6335,6 +6485,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.2", "tokio", + "tokio-native-tls", "tokio-rustls", "tokio-util", "tower 0.5.3", @@ -7774,6 +7925,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rayon" version = "2.1.0" @@ -8501,6 +8662,7 @@ dependencies = [ "base64 0.22.1", "flate2", "log", + "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -8530,6 +8692,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -9399,6 +9567,12 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 3aa4e78ed5c..02df9a50f98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" } dynamo-kv-router = { path = "lib/kv-router", version = "1.0.0", features = ["metrics"] } dynamo-async-openai = { path = "lib/async-openai", version = "1.0.0", features = ["byot"] } dynamo-parsers = { path = "lib/parsers", version = "1.0.0" } +fastokens = { git = "https://github.com/Atero-ai/fastokens", version = "0.1.0" } # kvbm kvbm-common = { path = "lib/kvbm-common", version = "0.1.0" } diff --git a/components/src/dynamo/frontend/frontend_args.py b/components/src/dynamo/frontend/frontend_args.py index 738279328be..0ee2ca581d5 100644 --- a/components/src/dynamo/frontend/frontend_args.py +++ b/components/src/dynamo/frontend/frontend_args.py @@ -76,6 +76,7 @@ class FrontendConfig(KvRouterConfigBase): enable_streaming_tool_dispatch: bool enable_streaming_reasoning_dispatch: bool preprocess_workers: int + tokenizer_backend: str def validate(self) -> None: if bool(self.tls_cert_path) ^ bool(self.tls_key_path): # ^ is XOR @@ -424,3 +425,16 @@ def add_arguments(self, parser) -> None: ), arg_type=int, ) + + add_argument( + g, + flag_name="--dyn-tokenizer-backend", + env_var="DYN_TOKENIZER_BACKEND", + default="default", + dest="tokenizer_backend", + help=( + "Tokenizer backend for BPE models: 'default' (HuggingFace tokenizers library) " + "or 'fasttokens' (fastokens crate for high-performance BPE encoding). " + "Decoding always uses HuggingFace. Has no effect on TikToken models." + ), + ) diff --git a/components/src/dynamo/frontend/main.py b/components/src/dynamo/frontend/main.py index 86d4c9d55fb..6203f2bb1f7 100644 --- a/components/src/dynamo/frontend/main.py +++ b/components/src/dynamo/frontend/main.py @@ -165,6 +165,8 @@ async def async_main(): config, vllm_flags, sglang_flags = parse_args() dump_config(config.dump_config_to, config) os.environ["DYN_EVENT_PLANE"] = config.event_plane + if config.tokenizer_backend == "fasttokens": + os.environ["DYN_TOKENIZER_BACKEND"] = "fasttokens" logger.info( f"Request migration {'enabled' if config.migration_limit > 0 else 'disabled'} " f"(limit: {config.migration_limit})" diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index 756b0acff11..e3051ef0e56 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -603,7 +603,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -612,6 +621,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bit_field" version = "0.10.3" @@ -1157,6 +1172,12 @@ dependencies = [ "syn", ] +[[package]] +name = "daachorse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36" + [[package]] name = "darling" version = "0.20.11" @@ -1587,6 +1608,7 @@ dependencies = [ "dynamo-runtime", "dynamo-tokens", "either", + "fastokens", "ffmpeg-next", "flate2", "futures", @@ -2014,11 +2036,40 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ - "bit-set", + "bit-set 0.5.3", "regex-automata", "regex-syntax", ] +[[package]] +name = "fancy-regex" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" +dependencies = [ + "bit-set 0.8.0", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fastokens" +version = "0.1.0" +source = "git+https://github.com/Atero-ai/fastokens#b8e895bdec4173d27e37758b1893f39557adf733" +dependencies = [ + "daachorse", + "fancy-regex 0.17.0", + "hf-hub", + "icu_normalizer", + "memchr", + "pcre2", + "rayon", + "serde", + "serde_json", + "strum", + "thiserror 2.0.18", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2163,6 +2214,21 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2517,6 +2583,7 @@ dependencies = [ "indicatif", "libc", "log", + "native-tls", "num_cpus", "rand 0.9.2", "reqwest", @@ -2654,6 +2721,22 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.20" @@ -2740,6 +2823,9 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", + "utf16_iter", + "utf8_iter", + "write16", "zerovec", ] @@ -3816,6 +3902,23 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe 0.2.1", + "openssl-sys", + "schannel", + "security-framework 3.7.0", + "security-framework-sys", + "tempfile", +] + [[package]] name = "ndarray" version = "0.16.1" @@ -4374,7 +4477,7 @@ dependencies = [ "base64 0.22.1", "bstr", "clap", - "fancy-regex", + "fancy-regex 0.13.0", "futures", "image", "regex", @@ -4388,6 +4491,32 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "openssl" +version = "0.10.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" +dependencies = [ + "bitflags 2.11.0", + "cfg-if 1.0.4", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "openssl-probe" version = "0.1.6" @@ -4400,6 +4529,18 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "openssl-sys" +version = "0.9.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "opentelemetry" version = "0.31.0" @@ -4585,6 +4726,28 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pcre2" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "pear" version = "0.2.9" @@ -5537,11 +5700,13 @@ dependencies = [ "http-body-util", "hyper", "hyper-rustls", + "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -5553,6 +5718,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", + "tokio-native-tls", "tokio-rustls", "tokio-util", "tower", @@ -6549,7 +6715,7 @@ dependencies = [ "anyhow", "base64 0.22.1", "bstr", - "fancy-regex", + "fancy-regex 0.13.0", "lazy_static", "regex", "rustc-hash 1.1.0", @@ -6706,6 +6872,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rayon" version = "2.1.0" @@ -7322,6 +7498,7 @@ dependencies = [ "base64 0.22.1", "flate2", "log", + "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -7345,6 +7522,12 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -8129,6 +8312,12 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" version = "0.6.2" diff --git a/lib/bindings/python/pyproject.toml b/lib/bindings/python/pyproject.toml index 1ca4646e33f..7acb044ec05 100644 --- a/lib/bindings/python/pyproject.toml +++ b/lib/bindings/python/pyproject.toml @@ -22,8 +22,8 @@ readme = "README.md" authors = [ { name = "NVIDIA Inc.", email = "sw-dl-dynamo@nvidia.com" }, ] -license = { text = "Apache-2.0" } -license-files = ["LICENSE"] +# license = { text = "Apache-2.0" } +# license-files = ["LICENSE"] requires-python = ">=3.10" dependencies = [ "pydantic>=2.10.6,<=2.13", diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml index 2a9351075e4..b14226b160a 100644 --- a/lib/llm/Cargo.toml +++ b/lib/llm/Cargo.toml @@ -143,6 +143,7 @@ tokenizers = { version = "0.21.4", default-features = false, features = [ ] } tiktoken-rs = { version = "0.9", default-features = false } rustc-hash = "1.1" +fastokens = { workspace = true } # backend galil-seiferas = { version = "0.1" } diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index 38edb196ffb..5d0d37c846a 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -378,12 +378,38 @@ impl ModelDeploymentCard { /// Load the tokenizer as a generic, backend-agnostic `Tokenizer` trait object. /// This supports both HuggingFace `tokenizer.json` and tiktoken `.model`/`.tiktoken` files. + /// + /// When the `DYN_TOKENIZER_BACKEND=fasttokens` env var is set, uses `fastokens` for encoding pub fn tokenizer(&self) -> anyhow::Result { + let use_fast = std::env::var("DYN_TOKENIZER_BACKEND") + .map(|v| v == "fasttokens") + .unwrap_or(false); + match &self.tokenizer { Some(TokenizerKind::HfTokenizerJson(checked_file)) => { let p = checked_file.path().ok_or_else(|| { anyhow::anyhow!("Tokenizer is URL-backed ({:?})", checked_file.url()) })?; + + // Try fasttokens backend if requested + if use_fast { + let path_str = p.to_str().ok_or_else(|| { + anyhow::anyhow!("Tokenizer path contains invalid UTF-8: {}", p.display()) + })?; + match crate::tokenizers::FastTokenizer::from_file(path_str) { + Ok(fast) => { + tracing::info!("Using fasttokens tokenizer backend"); + return Ok(crate::tokenizers::Tokenizer::from(Arc::new(fast))); + } + Err(e) => { + tracing::warn!( + %e, + "Failed to load fasttokens, falling back to HuggingFace" + ); + } + } + } + let hf = HfTokenizer::from_file(p) .inspect_err(|err| { if let Some(serde_err) = err.downcast_ref::() diff --git a/lib/llm/src/tokenizers.rs b/lib/llm/src/tokenizers.rs index 554f112e67a..a036957a880 100644 --- a/lib/llm/src/tokenizers.rs +++ b/lib/llm/src/tokenizers.rs @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +pub mod fast; pub mod hf; pub mod tiktoken; @@ -15,6 +16,7 @@ use std::{ops::Deref, path::Path}; use crate::protocols::TokenIdType; pub use anyhow::{Error, Result}; +pub use fast::FastTokenizer; pub use hf::HuggingFaceTokenizer; pub use tiktoken::TikTokenTokenizer; diff --git a/lib/llm/src/tokenizers/fast.rs b/lib/llm/src/tokenizers/fast.rs new file mode 100644 index 00000000000..36dd60a0c10 --- /dev/null +++ b/lib/llm/src/tokenizers/fast.rs @@ -0,0 +1,144 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Fasttokens backend using the `fastokens` crate for high-performance BPE encoding. +//! +//! `fastokens` only supports encoding, so this module provides a hybrid tokenizer that +//! uses `fastokens` for encoding and falls back to `HuggingFaceTokenizer` for decoding. +//! Both are loaded from the same `tokenizer.json` file. + +use std::path::Path; + +use rayon::prelude::*; + +use super::{ + Encoding, Error, Result, TokenIdType, + hf::HuggingFaceTokenizer, + traits::{Decoder, Encoder, Tokenizer}, +}; + +/// Hybrid tokenizer: fast BPE encoding via `fastokens`, decoding via HuggingFace. +/// +/// Both backends are loaded from the same `tokenizer.json` file. +pub struct FastTokenizer { + fast_encoder: fastokens::Tokenizer, + hf_decoder: HuggingFaceTokenizer, +} + +impl FastTokenizer { + pub fn from_file(path: &str) -> Result { + let fast_encoder = fastokens::Tokenizer::from_file(Path::new(path)) + .map_err(|e| Error::msg(format!("Error loading fasttokens tokenizer: {e}")))?; + let hf_decoder = HuggingFaceTokenizer::from_file(path)?; + Ok(Self { + fast_encoder, + hf_decoder, + }) + } +} + +impl Encoder for FastTokenizer { + fn encode(&self, input: &str) -> Result { + let ids = self + .fast_encoder + .encode(input) + .map_err(|e| Error::msg(format!("Fasttokens encode error: {e}")))?; + Ok(Encoding::Sp(ids)) + } + + fn encode_batch(&self, inputs: &[&str]) -> Result> { + inputs.par_iter().map(|input| self.encode(input)).collect() + } +} + +impl Decoder for FastTokenizer { + fn decode(&self, token_ids: &[TokenIdType], skip_special_tokens: bool) -> Result { + self.hf_decoder.decode(token_ids, skip_special_tokens) + } +} + +impl Tokenizer for FastTokenizer {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tokenizers::HuggingFaceTokenizer; + + // Minimal synthetic BPE tokenizer with no normalizer or post-processor -- + // compatible with fastokens. Vocab covers: H,T,a,d,e,h,i,l,o,r,s,t,w + punctuation. + const TOKENIZER_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/data/sample-models/minimal-bpe/tokenizer.json" + ); + + #[test] + fn test_fast_encode_decode_roundtrip() { + let tokenizer = FastTokenizer::from_file(TOKENIZER_PATH).unwrap(); + // Encode then decode: verifies both paths execute without error. + // With a null decoder, HF inserts spaces between tokens so exact equality + // is not expected here -- we just verify the operations succeed and produce + // non-empty results. + let text = "Hello, world!"; + let encoding = tokenizer.encode(text).unwrap(); + assert!(!encoding.token_ids().is_empty()); + let decoded = tokenizer.decode(encoding.token_ids(), true).unwrap(); + assert!(!decoded.is_empty()); + // The decoded text should contain the same non-space characters + let enc_chars: String = text.chars().filter(|c| !c.is_whitespace()).collect(); + let dec_chars: String = decoded.chars().filter(|c| !c.is_whitespace()).collect(); + assert_eq!( + enc_chars, dec_chars, + "non-space characters must be preserved" + ); + } + + #[test] + fn test_fast_matches_hf_encoding() { + let fast = FastTokenizer::from_file(TOKENIZER_PATH).unwrap(); + let hf = HuggingFaceTokenizer::from_file(TOKENIZER_PATH).unwrap(); + + for text in &["Hello, world!", "Hello", " world", "He llo"] { + let fast_ids = fast.encode(text).unwrap(); + let hf_ids = hf.encode(text).unwrap(); + assert_eq!( + fast_ids.token_ids(), + hf_ids.token_ids(), + "fasttokens and HuggingFace must produce identical token IDs for '{text}'" + ); + } + } + + #[test] + fn test_fast_batch_encode() { + let tokenizer = FastTokenizer::from_file(TOKENIZER_PATH).unwrap(); + let inputs = &["Hello", " world", "Hello, world!"]; + let encodings = tokenizer.encode_batch(inputs).unwrap(); + assert_eq!(encodings.len(), inputs.len()); + for (enc, input) in encodings.iter().zip(inputs.iter()) { + assert!( + !enc.token_ids().is_empty(), + "encoding for '{input}' must be non-empty" + ); + } + } + + #[test] + fn test_fast_with_decode_stream() { + use crate::tokenizers::Tokenizer as TokenizerWrapper; + use std::sync::Arc; + + let tokenizer = Arc::new(FastTokenizer::from_file(TOKENIZER_PATH).unwrap()); + let wrapper = TokenizerWrapper::from(tokenizer); + + // Encode a prompt and a continuation, then step through the decode stream + let prompt_ids = wrapper.encode("Hello").unwrap().token_ids().to_vec(); + let continuation = ", world!"; + let cont_ids = wrapper.encode(continuation).unwrap().token_ids().to_vec(); + + let mut stream = wrapper.decode_stream(&prompt_ids, true); + // DecodeStream produces output incrementally; just verify it runs without error. + for id in cont_ids { + stream.step(id).unwrap(); + } + } +} diff --git a/lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json b/lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json new file mode 100644 index 00000000000..057c81dca20 --- /dev/null +++ b/lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json @@ -0,0 +1,52 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [], + "normalizer": null, + "pre_tokenizer": null, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "": 0, + " ": 1, + "!": 2, + ",": 3, + ".": 4, + "H": 5, + "T": 6, + "a": 7, + "d": 8, + "e": 9, + "h": 10, + "i": 11, + "l": 12, + "o": 13, + "r": 14, + "s": 15, + "t": 16, + "w": 17, + "He": 18, + "ll": 19, + "llo": 20, + "or": 21, + "ld": 22 + }, + "merges": [ + "H e", + "l l", + "ll o", + "o r", + "l d" + ] + } +} From ec223876f3d47bc366b686574d3f82d1f75ffd92 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 16:13:03 -0700 Subject: [PATCH 02/14] update --- lib/bindings/python/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bindings/python/pyproject.toml b/lib/bindings/python/pyproject.toml index 7acb044ec05..1ca4646e33f 100644 --- a/lib/bindings/python/pyproject.toml +++ b/lib/bindings/python/pyproject.toml @@ -22,8 +22,8 @@ readme = "README.md" authors = [ { name = "NVIDIA Inc.", email = "sw-dl-dynamo@nvidia.com" }, ] -# license = { text = "Apache-2.0" } -# license-files = ["LICENSE"] +license = { text = "Apache-2.0" } +license-files = ["LICENSE"] requires-python = ">=3.10" dependencies = [ "pydantic>=2.10.6,<=2.13", From 07c9f3cf015b1dc07e9c0945d613743b7aeec3b3 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 16:58:40 -0700 Subject: [PATCH 03/14] fix --- .../src/dynamo/frontend/frontend_args.py | 1 + components/src/dynamo/frontend/main.py | 2 + lib/llm/src/model_card.rs | 45 ++++++++++++------- lib/llm/src/tokenizers/fast.rs | 16 +++++-- 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/components/src/dynamo/frontend/frontend_args.py b/components/src/dynamo/frontend/frontend_args.py index 0ee2ca581d5..1a17d1f9a6f 100644 --- a/components/src/dynamo/frontend/frontend_args.py +++ b/components/src/dynamo/frontend/frontend_args.py @@ -437,4 +437,5 @@ def add_arguments(self, parser) -> None: "or 'fasttokens' (fastokens crate for high-performance BPE encoding). " "Decoding always uses HuggingFace. Has no effect on TikToken models." ), + choices=["default", "fasttokens"], ) diff --git a/components/src/dynamo/frontend/main.py b/components/src/dynamo/frontend/main.py index 6203f2bb1f7..ad367e7c8f1 100644 --- a/components/src/dynamo/frontend/main.py +++ b/components/src/dynamo/frontend/main.py @@ -167,6 +167,8 @@ async def async_main(): os.environ["DYN_EVENT_PLANE"] = config.event_plane if config.tokenizer_backend == "fasttokens": os.environ["DYN_TOKENIZER_BACKEND"] = "fasttokens" + else: + os.environ.pop("DYN_TOKENIZER_BACKEND", None) logger.info( f"Request migration {'enabled' if config.migration_limit > 0 else 'disabled'} " f"(limit: {config.migration_limit})" diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index 5d0d37c846a..9f2c07a4d0a 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -381,9 +381,18 @@ impl ModelDeploymentCard { /// /// When the `DYN_TOKENIZER_BACKEND=fasttokens` env var is set, uses `fastokens` for encoding pub fn tokenizer(&self) -> anyhow::Result { - let use_fast = std::env::var("DYN_TOKENIZER_BACKEND") - .map(|v| v == "fasttokens") - .unwrap_or(false); + let use_fast = match std::env::var("DYN_TOKENIZER_BACKEND") { + Ok(v) if v == "fasttokens" => true, + Ok(v) if v == "default" || v.is_empty() => false, + Ok(v) => { + tracing::warn!( + value = %v, + "Unrecognized DYN_TOKENIZER_BACKEND value, expected 'fasttokens' or 'default'; falling back to default" + ); + false + } + Err(_) => false, + }; match &self.tokenizer { Some(TokenizerKind::HfTokenizerJson(checked_file)) => { @@ -393,20 +402,24 @@ impl ModelDeploymentCard { // Try fasttokens backend if requested if use_fast { - let path_str = p.to_str().ok_or_else(|| { - anyhow::anyhow!("Tokenizer path contains invalid UTF-8: {}", p.display()) - })?; - match crate::tokenizers::FastTokenizer::from_file(path_str) { - Ok(fast) => { - tracing::info!("Using fasttokens tokenizer backend"); - return Ok(crate::tokenizers::Tokenizer::from(Arc::new(fast))); - } - Err(e) => { - tracing::warn!( - %e, - "Failed to load fasttokens, falling back to HuggingFace" - ); + if let Some(path_str) = p.to_str() { + match crate::tokenizers::FastTokenizer::from_file(path_str) { + Ok(fast) => { + tracing::info!("Using fasttokens tokenizer backend"); + return Ok(crate::tokenizers::Tokenizer::from(Arc::new(fast))); + } + Err(e) => { + tracing::warn!( + %e, + "Failed to load fasttokens, falling back to HuggingFace" + ); + } } + } else { + tracing::warn!( + path = %p.display(), + "Tokenizer path contains non-UTF-8 characters, skipping fasttokens; falling back to HuggingFace" + ); } } diff --git a/lib/llm/src/tokenizers/fast.rs b/lib/llm/src/tokenizers/fast.rs index 36dd60a0c10..1bcf726f0fc 100644 --- a/lib/llm/src/tokenizers/fast.rs +++ b/lib/llm/src/tokenizers/fast.rs @@ -136,9 +136,19 @@ mod tests { let cont_ids = wrapper.encode(continuation).unwrap().token_ids().to_vec(); let mut stream = wrapper.decode_stream(&prompt_ids, true); - // DecodeStream produces output incrementally; just verify it runs without error. - for id in cont_ids { - stream.step(id).unwrap(); + // Accumulate incremental chunks from decode_stream + let mut accumulated = String::new(); + for id in &cont_ids { + if let Some(chunk) = stream.step(*id).unwrap() { + accumulated.push_str(&chunk); + } } + + // The accumulated streamed text should match the decoded continuation + let expected = wrapper.decode(&cont_ids, true).unwrap(); + assert_eq!( + accumulated, expected, + "streamed chunks must equal batch-decoded continuation" + ); } } From 7af421e6796664834dc9404aa6081980be931bfa Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 17:13:57 -0700 Subject: [PATCH 04/14] rename fasttokens to fastokens --- components/src/dynamo/frontend/frontend_args.py | 4 ++-- components/src/dynamo/frontend/main.py | 4 ++-- lib/llm/src/model_card.rs | 14 +++++++------- lib/llm/src/tokenizers/fast.rs | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/components/src/dynamo/frontend/frontend_args.py b/components/src/dynamo/frontend/frontend_args.py index 1a17d1f9a6f..0e32edb820c 100644 --- a/components/src/dynamo/frontend/frontend_args.py +++ b/components/src/dynamo/frontend/frontend_args.py @@ -434,8 +434,8 @@ def add_arguments(self, parser) -> None: dest="tokenizer_backend", help=( "Tokenizer backend for BPE models: 'default' (HuggingFace tokenizers library) " - "or 'fasttokens' (fastokens crate for high-performance BPE encoding). " + "or 'fastokens' (fastokens crate for high-performance BPE encoding). " "Decoding always uses HuggingFace. Has no effect on TikToken models." ), - choices=["default", "fasttokens"], + choices=["default", "fastokens"], ) diff --git a/components/src/dynamo/frontend/main.py b/components/src/dynamo/frontend/main.py index ad367e7c8f1..a8374b5d44f 100644 --- a/components/src/dynamo/frontend/main.py +++ b/components/src/dynamo/frontend/main.py @@ -165,8 +165,8 @@ async def async_main(): config, vllm_flags, sglang_flags = parse_args() dump_config(config.dump_config_to, config) os.environ["DYN_EVENT_PLANE"] = config.event_plane - if config.tokenizer_backend == "fasttokens": - os.environ["DYN_TOKENIZER_BACKEND"] = "fasttokens" + if config.tokenizer_backend == "fastokens": + os.environ["DYN_TOKENIZER_BACKEND"] = "fastokens" else: os.environ.pop("DYN_TOKENIZER_BACKEND", None) logger.info( diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index 9f2c07a4d0a..a800f2197b5 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -379,15 +379,15 @@ impl ModelDeploymentCard { /// Load the tokenizer as a generic, backend-agnostic `Tokenizer` trait object. /// This supports both HuggingFace `tokenizer.json` and tiktoken `.model`/`.tiktoken` files. /// - /// When the `DYN_TOKENIZER_BACKEND=fasttokens` env var is set, uses `fastokens` for encoding + /// When the `DYN_TOKENIZER_BACKEND=fastokens` env var is set, uses `fastokens` for encoding pub fn tokenizer(&self) -> anyhow::Result { let use_fast = match std::env::var("DYN_TOKENIZER_BACKEND") { - Ok(v) if v == "fasttokens" => true, + Ok(v) if v == "fastokens" => true, Ok(v) if v == "default" || v.is_empty() => false, Ok(v) => { tracing::warn!( value = %v, - "Unrecognized DYN_TOKENIZER_BACKEND value, expected 'fasttokens' or 'default'; falling back to default" + "Unrecognized DYN_TOKENIZER_BACKEND value, expected 'fastokens' or 'default'; falling back to default" ); false } @@ -400,25 +400,25 @@ impl ModelDeploymentCard { anyhow::anyhow!("Tokenizer is URL-backed ({:?})", checked_file.url()) })?; - // Try fasttokens backend if requested + // Try fastokens backend if requested if use_fast { if let Some(path_str) = p.to_str() { match crate::tokenizers::FastTokenizer::from_file(path_str) { Ok(fast) => { - tracing::info!("Using fasttokens tokenizer backend"); + tracing::info!("Using fastokens tokenizer backend"); return Ok(crate::tokenizers::Tokenizer::from(Arc::new(fast))); } Err(e) => { tracing::warn!( %e, - "Failed to load fasttokens, falling back to HuggingFace" + "Failed to load fastokens, falling back to HuggingFace" ); } } } else { tracing::warn!( path = %p.display(), - "Tokenizer path contains non-UTF-8 characters, skipping fasttokens; falling back to HuggingFace" + "Tokenizer path contains non-UTF-8 characters, skipping fastokens; falling back to HuggingFace" ); } } diff --git a/lib/llm/src/tokenizers/fast.rs b/lib/llm/src/tokenizers/fast.rs index 1bcf726f0fc..d7efee49e5c 100644 --- a/lib/llm/src/tokenizers/fast.rs +++ b/lib/llm/src/tokenizers/fast.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Fasttokens backend using the `fastokens` crate for high-performance BPE encoding. +//! Fastokens backend using the `fastokens` crate for high-performance BPE encoding. //! //! `fastokens` only supports encoding, so this module provides a hybrid tokenizer that //! uses `fastokens` for encoding and falls back to `HuggingFaceTokenizer` for decoding. @@ -28,7 +28,7 @@ pub struct FastTokenizer { impl FastTokenizer { pub fn from_file(path: &str) -> Result { let fast_encoder = fastokens::Tokenizer::from_file(Path::new(path)) - .map_err(|e| Error::msg(format!("Error loading fasttokens tokenizer: {e}")))?; + .map_err(|e| Error::msg(format!("Error loading fastokens tokenizer: {e}")))?; let hf_decoder = HuggingFaceTokenizer::from_file(path)?; Ok(Self { fast_encoder, @@ -42,7 +42,7 @@ impl Encoder for FastTokenizer { let ids = self .fast_encoder .encode(input) - .map_err(|e| Error::msg(format!("Fasttokens encode error: {e}")))?; + .map_err(|e| Error::msg(format!("Fastokens encode error: {e}")))?; Ok(Encoding::Sp(ids)) } @@ -103,7 +103,7 @@ mod tests { assert_eq!( fast_ids.token_ids(), hf_ids.token_ids(), - "fasttokens and HuggingFace must produce identical token IDs for '{text}'" + "fastokens and HuggingFace must produce identical token IDs for '{text}'" ); } } From 70d90650d4afc3812c47124147256f8b08f5e5d2 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 17:18:17 -0700 Subject: [PATCH 05/14] fix CI issue. point to HEAD of https://github.com/Atero-ai/fastokens/pull/5 until PR is merged --- Cargo.lock | 117 ++++------------------------------------------------- Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1934a4bd718..05f4afe0da0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1063,7 +1063,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -2410,7 +2410,7 @@ dependencies = [ [[package]] name = "fastokens" version = "0.1.0" -source = "git+https://github.com/Atero-ai/fastokens#b8e895bdec4173d27e37758b1893f39557adf733" +source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" dependencies = [ "daachorse", "fancy-regex 0.17.0", @@ -2575,21 +2575,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3028,7 +3013,6 @@ dependencies = [ "indicatif 0.17.11", "libc", "log", - "native-tls", "num_cpus", "rand 0.9.2", "reqwest 0.12.28", @@ -3212,22 +3196,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper 1.8.1", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.20" @@ -3245,7 +3213,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.5.10", "system-configuration 0.7.0", "tokio", "tower-service", @@ -4594,23 +4562,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "native-tls" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe 0.2.1", - "openssl-sys", - "schannel", - "security-framework 3.7.0", - "security-framework-sys", - "tempfile", -] - [[package]] name = "ndarray" version = "0.16.1" @@ -5236,32 +5187,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "openssl" -version = "0.10.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" -dependencies = [ - "bitflags 2.11.0", - "cfg-if 1.0.4", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "openssl-probe" version = "0.1.6" @@ -5274,18 +5199,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" -[[package]] -name = "openssl-sys" -version = "0.9.112" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "opentelemetry" version = "0.31.0" @@ -5919,7 +5832,7 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "itertools 0.14.0", "log", "multimap", @@ -5939,7 +5852,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "itertools 0.14.0", "log", "multimap", @@ -6101,7 +6014,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -6138,7 +6051,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] @@ -6467,13 +6380,11 @@ dependencies = [ "http-body-util", "hyper 1.8.1", "hyper-rustls", - "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", - "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -6485,7 +6396,6 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.2", "tokio", - "tokio-native-tls", "tokio-rustls", "tokio-util", "tower 0.5.3", @@ -7925,16 +7835,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-rayon" version = "2.1.0" @@ -8662,7 +8562,6 @@ dependencies = [ "base64 0.22.1", "flate2", "log", - "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -9150,7 +9049,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 02df9a50f98..e70746c1909 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" } dynamo-kv-router = { path = "lib/kv-router", version = "1.0.0", features = ["metrics"] } dynamo-async-openai = { path = "lib/async-openai", version = "1.0.0", features = ["byot"] } dynamo-parsers = { path = "lib/parsers", version = "1.0.0" } -fastokens = { git = "https://github.com/Atero-ai/fastokens", version = "0.1.0" } +fastokens = { git = "https://github.com/biswapanda/fastokens", rev = "aed8b9f3ba024c689c72985d62506dfa79daec25", version = "0.1.0" } # kvbm kvbm-common = { path = "lib/kvbm-common", version = "0.1.0" } From 45c13628385eeecf656cb6d107107c64017e47b2 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 18:09:50 -0700 Subject: [PATCH 06/14] address comment --- components/src/dynamo/frontend/frontend_args.py | 4 ++-- components/src/dynamo/frontend/main.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/src/dynamo/frontend/frontend_args.py b/components/src/dynamo/frontend/frontend_args.py index 0e32edb820c..adc60be3949 100644 --- a/components/src/dynamo/frontend/frontend_args.py +++ b/components/src/dynamo/frontend/frontend_args.py @@ -428,8 +428,8 @@ def add_arguments(self, parser) -> None: add_argument( g, - flag_name="--dyn-tokenizer-backend", - env_var="DYN_TOKENIZER_BACKEND", + flag_name="--tokenizer", + env_var="DYN_TOKENIZER", default="default", dest="tokenizer_backend", help=( diff --git a/components/src/dynamo/frontend/main.py b/components/src/dynamo/frontend/main.py index a8374b5d44f..b382fca8b46 100644 --- a/components/src/dynamo/frontend/main.py +++ b/components/src/dynamo/frontend/main.py @@ -166,9 +166,9 @@ async def async_main(): dump_config(config.dump_config_to, config) os.environ["DYN_EVENT_PLANE"] = config.event_plane if config.tokenizer_backend == "fastokens": - os.environ["DYN_TOKENIZER_BACKEND"] = "fastokens" + os.environ["DYN_TOKENIZER"] = "fastokens" else: - os.environ.pop("DYN_TOKENIZER_BACKEND", None) + os.environ.pop("DYN_TOKENIZER", None) logger.info( f"Request migration {'enabled' if config.migration_limit > 0 else 'disabled'} " f"(limit: {config.migration_limit})" From 516cc2e5bf6f72136f854419d137d5b4b7587a24 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 18:10:38 -0700 Subject: [PATCH 07/14] address comment --- lib/llm/src/model_card.rs | 6 +++--- lib/llm/src/tokenizers/fast.rs | 12 +++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index a800f2197b5..c3f2fe942e9 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -379,15 +379,15 @@ impl ModelDeploymentCard { /// Load the tokenizer as a generic, backend-agnostic `Tokenizer` trait object. /// This supports both HuggingFace `tokenizer.json` and tiktoken `.model`/`.tiktoken` files. /// - /// When the `DYN_TOKENIZER_BACKEND=fastokens` env var is set, uses `fastokens` for encoding + /// When the `DYN_TOKENIZER=fastokens` env var is set, uses `fastokens` for encoding pub fn tokenizer(&self) -> anyhow::Result { - let use_fast = match std::env::var("DYN_TOKENIZER_BACKEND") { + let use_fast = match std::env::var("DYN_TOKENIZER") { Ok(v) if v == "fastokens" => true, Ok(v) if v == "default" || v.is_empty() => false, Ok(v) => { tracing::warn!( value = %v, - "Unrecognized DYN_TOKENIZER_BACKEND value, expected 'fastokens' or 'default'; falling back to default" + "Unrecognized DYN_TOKENIZER value, expected 'fastokens' or 'default'; falling back to default" ); false } diff --git a/lib/llm/src/tokenizers/fast.rs b/lib/llm/src/tokenizers/fast.rs index d7efee49e5c..83c5c7e3bad 100644 --- a/lib/llm/src/tokenizers/fast.rs +++ b/lib/llm/src/tokenizers/fast.rs @@ -144,11 +144,17 @@ mod tests { } } - // The accumulated streamed text should match the decoded continuation - let expected = wrapper.decode(&cont_ids, true).unwrap(); + // DecodeStream uses prompt tokens as context, so the expected text is + // decode(prompt + continuation) minus decode(prompt) -- not a bare + // decode(continuation) which lacks the surrounding context. + let mut all_ids = prompt_ids.clone(); + all_ids.extend_from_slice(&cont_ids); + let full_text = wrapper.decode(&all_ids, true).unwrap(); + let prompt_text = wrapper.decode(&prompt_ids, true).unwrap(); + let expected = &full_text[prompt_text.len()..]; assert_eq!( accumulated, expected, - "streamed chunks must equal batch-decoded continuation" + "streamed chunks must equal context-aware decoded continuation" ); } } From 076f94d3931f54954079353af26dc3a8b0b4c618 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 18:10:51 -0700 Subject: [PATCH 08/14] update --- lib/bindings/python/Cargo.lock | 103 +-------------------------------- 1 file changed, 1 insertion(+), 102 deletions(-) diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index e3051ef0e56..90a199de767 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -2055,7 +2055,7 @@ dependencies = [ [[package]] name = "fastokens" version = "0.1.0" -source = "git+https://github.com/Atero-ai/fastokens#b8e895bdec4173d27e37758b1893f39557adf733" +source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" dependencies = [ "daachorse", "fancy-regex 0.17.0", @@ -2214,21 +2214,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2583,7 +2568,6 @@ dependencies = [ "indicatif", "libc", "log", - "native-tls", "num_cpus", "rand 0.9.2", "reqwest", @@ -2721,22 +2705,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.20" @@ -3902,23 +3870,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "native-tls" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe 0.2.1", - "openssl-sys", - "schannel", - "security-framework 3.7.0", - "security-framework-sys", - "tempfile", -] - [[package]] name = "ndarray" version = "0.16.1" @@ -4491,32 +4442,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "openssl" -version = "0.10.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" -dependencies = [ - "bitflags 2.11.0", - "cfg-if 1.0.4", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "openssl-probe" version = "0.1.6" @@ -4529,18 +4454,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" -[[package]] -name = "openssl-sys" -version = "0.9.112" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "opentelemetry" version = "0.31.0" @@ -5700,13 +5613,11 @@ dependencies = [ "http-body-util", "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", - "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -5718,7 +5629,6 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-native-tls", "tokio-rustls", "tokio-util", "tower", @@ -6872,16 +6782,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-rayon" version = "2.1.0" @@ -7498,7 +7398,6 @@ dependencies = [ "base64 0.22.1", "flate2", "log", - "native-tls", "once_cell", "rustls", "rustls-pki-types", From ae96da0a3fc410bfc65f9a444175d3f5799a390e Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 18:20:08 -0700 Subject: [PATCH 09/14] update --- .../src/dynamo/frontend/frontend_args.py | 7 ++ lib/bindings/kvbm/Cargo.lock | 96 ++++++++++++++++++- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/components/src/dynamo/frontend/frontend_args.py b/components/src/dynamo/frontend/frontend_args.py index adc60be3949..d51543e6f75 100644 --- a/components/src/dynamo/frontend/frontend_args.py +++ b/components/src/dynamo/frontend/frontend_args.py @@ -78,6 +78,8 @@ class FrontendConfig(KvRouterConfigBase): preprocess_workers: int tokenizer_backend: str + _VALID_TOKENIZER_BACKENDS = {"default", "fastokens"} + def validate(self) -> None: if bool(self.tls_cert_path) ^ bool(self.tls_key_path): # ^ is XOR raise ValueError( @@ -89,6 +91,11 @@ def validate(self) -> None: ) if self.router_enable_cache_control and self.router_mode != "kv": raise ValueError("--enable-cache-control requires --router-mode=kv") + if self.tokenizer_backend not in self._VALID_TOKENIZER_BACKENDS: + raise ValueError( + f"--tokenizer: invalid value '{self.tokenizer_backend}' " + f"(choose from {sorted(self._VALID_TOKENIZER_BACKENDS)})" + ) @register_encoder(FrontendConfig) diff --git a/lib/bindings/kvbm/Cargo.lock b/lib/bindings/kvbm/Cargo.lock index ff9417611ec..d7f195a7eae 100644 --- a/lib/bindings/kvbm/Cargo.lock +++ b/lib/bindings/kvbm/Cargo.lock @@ -585,7 +585,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -594,6 +603,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bit_field" version = "0.10.3" @@ -1139,6 +1154,12 @@ dependencies = [ "syn", ] +[[package]] +name = "daachorse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36" + [[package]] name = "darling" version = "0.20.11" @@ -1574,6 +1595,7 @@ dependencies = [ "dynamo-runtime", "dynamo-tokens", "either", + "fastokens", "flate2", "futures", "futures-util", @@ -1970,11 +1992,40 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ - "bit-set", + "bit-set 0.5.3", "regex-automata", "regex-syntax", ] +[[package]] +name = "fancy-regex" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" +dependencies = [ + "bit-set 0.8.0", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fastokens" +version = "0.1.0" +source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" +dependencies = [ + "daachorse", + "fancy-regex 0.17.0", + "hf-hub", + "icu_normalizer", + "memchr", + "pcre2", + "rayon", + "serde", + "serde_json", + "strum", + "thiserror 2.0.18", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2671,6 +2722,9 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", + "utf16_iter", + "utf8_iter", + "write16", "zerovec", ] @@ -4317,7 +4371,7 @@ dependencies = [ "base64 0.22.1", "bstr", "clap", - "fancy-regex", + "fancy-regex 0.13.0", "futures", "image", "regex", @@ -4528,6 +4582,28 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pcre2" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "pear" version = "0.2.9" @@ -6482,7 +6558,7 @@ dependencies = [ "anyhow", "base64 0.22.1", "bstr", - "fancy-regex", + "fancy-regex 0.13.0", "lazy_static", "regex", "rustc-hash 1.1.0", @@ -7278,6 +7354,12 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -8045,6 +8127,12 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" version = "0.6.2" From 2293fc5130fb6158e0fe0474838d4f8ef218b7f1 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 18:48:50 -0700 Subject: [PATCH 10/14] add BSD licensed libpcre2-dev for fastokens --- deploy/inference-gateway/epp/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deploy/inference-gateway/epp/Dockerfile b/deploy/inference-gateway/epp/Dockerfile index 86d4944a616..206cd2cdda1 100644 --- a/deploy/inference-gateway/epp/Dockerfile +++ b/deploy/inference-gateway/epp/Dockerfile @@ -87,10 +87,12 @@ ARG BUILD_REF WORKDIR /workspace # Install build dependencies for CGO +# libpcre2-dev: required by the pcre2 crate (transitive dep of fastokens) RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ g++ \ libc-dev \ + libpcre2-dev \ && rm -rf /var/lib/apt/lists/* # Copy go mod files first for better caching (from default context = epp/) From a9de37b58a74050537a7685ac84876151285a9f6 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 19:41:55 -0700 Subject: [PATCH 11/14] fix --- .../epp/pkg/plugins/dynamo_kv_scorer/plugin.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go b/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go index 3228b3e0c27..48ac8b5d8d0 100644 --- a/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go +++ b/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go @@ -24,7 +24,7 @@ package dynamo_kv_scorer /* #cgo CPPFLAGS: -I${SRCDIR}/include #cgo CXXFLAGS: -std=c++17 -#cgo LDFLAGS: ${SRCDIR}/lib/libdynamo_llm_capi.a -lstdc++ -ldl -lpthread -lm +#cgo LDFLAGS: ${SRCDIR}/lib/libdynamo_llm_capi.a -lstdc++ -ldl -lpthread -lm -lpcre2-8 #include #include From d9a1ffc148af010d7e09d5efccba95cac26946f8 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 21:46:31 -0700 Subject: [PATCH 12/14] make disable pcre2 feature by default --- Cargo.lock | 31 +++---------------- Cargo.toml | 2 +- deploy/inference-gateway/epp/Dockerfile | 2 -- .../pkg/plugins/dynamo_kv_scorer/plugin.go | 2 +- 4 files changed, 6 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05f4afe0da0..cdfb9cc01b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2410,14 +2410,13 @@ dependencies = [ [[package]] name = "fastokens" version = "0.1.0" -source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" +source = "git+https://github.com/biswapanda/fastokens?rev=e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7#e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7" dependencies = [ "daachorse", "fancy-regex 0.17.0", "hf-hub", "icu_normalizer", "memchr", - "pcre2", "rayon", "serde", "serde_json", @@ -3213,7 +3212,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "system-configuration 0.7.0", "tokio", "tower-service", @@ -5384,28 +5383,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" -[[package]] -name = "pcre2" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" -dependencies = [ - "libc", - "log", - "pcre2-sys", -] - -[[package]] -name = "pcre2-sys" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "pear" version = "0.2.9" @@ -6014,7 +5991,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -6051,7 +6028,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] diff --git a/Cargo.toml b/Cargo.toml index e70746c1909..c09405c62bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" } dynamo-kv-router = { path = "lib/kv-router", version = "1.0.0", features = ["metrics"] } dynamo-async-openai = { path = "lib/async-openai", version = "1.0.0", features = ["byot"] } dynamo-parsers = { path = "lib/parsers", version = "1.0.0" } -fastokens = { git = "https://github.com/biswapanda/fastokens", rev = "aed8b9f3ba024c689c72985d62506dfa79daec25", version = "0.1.0" } +fastokens = { git = "https://github.com/biswapanda/fastokens", rev = "e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7", version = "0.1.0", default-features = false } # kvbm kvbm-common = { path = "lib/kvbm-common", version = "0.1.0" } diff --git a/deploy/inference-gateway/epp/Dockerfile b/deploy/inference-gateway/epp/Dockerfile index 206cd2cdda1..86d4944a616 100644 --- a/deploy/inference-gateway/epp/Dockerfile +++ b/deploy/inference-gateway/epp/Dockerfile @@ -87,12 +87,10 @@ ARG BUILD_REF WORKDIR /workspace # Install build dependencies for CGO -# libpcre2-dev: required by the pcre2 crate (transitive dep of fastokens) RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ g++ \ libc-dev \ - libpcre2-dev \ && rm -rf /var/lib/apt/lists/* # Copy go mod files first for better caching (from default context = epp/) diff --git a/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go b/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go index 48ac8b5d8d0..3228b3e0c27 100644 --- a/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go +++ b/deploy/inference-gateway/epp/pkg/plugins/dynamo_kv_scorer/plugin.go @@ -24,7 +24,7 @@ package dynamo_kv_scorer /* #cgo CPPFLAGS: -I${SRCDIR}/include #cgo CXXFLAGS: -std=c++17 -#cgo LDFLAGS: ${SRCDIR}/lib/libdynamo_llm_capi.a -lstdc++ -ldl -lpthread -lm -lpcre2-8 +#cgo LDFLAGS: ${SRCDIR}/lib/libdynamo_llm_capi.a -lstdc++ -ldl -lpthread -lm #include #include From dbc6a55e351bae45fa1604b1b5fbbb6279f6ee40 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 22:05:24 -0700 Subject: [PATCH 13/14] cargo toml/lock update --- lib/bindings/kvbm/Cargo.lock | 25 +------------------------ lib/bindings/python/Cargo.lock | 25 +------------------------ 2 files changed, 2 insertions(+), 48 deletions(-) diff --git a/lib/bindings/kvbm/Cargo.lock b/lib/bindings/kvbm/Cargo.lock index d7f195a7eae..82567dcba94 100644 --- a/lib/bindings/kvbm/Cargo.lock +++ b/lib/bindings/kvbm/Cargo.lock @@ -2011,14 +2011,13 @@ dependencies = [ [[package]] name = "fastokens" version = "0.1.0" -source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" +source = "git+https://github.com/biswapanda/fastokens?rev=e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7#e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7" dependencies = [ "daachorse", "fancy-regex 0.17.0", "hf-hub", "icu_normalizer", "memchr", - "pcre2", "rayon", "serde", "serde_json", @@ -4582,28 +4581,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" -[[package]] -name = "pcre2" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" -dependencies = [ - "libc", - "log", - "pcre2-sys", -] - -[[package]] -name = "pcre2-sys" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "pear" version = "0.2.9" diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index 90a199de767..190691a4852 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -2055,14 +2055,13 @@ dependencies = [ [[package]] name = "fastokens" version = "0.1.0" -source = "git+https://github.com/biswapanda/fastokens?rev=aed8b9f3ba024c689c72985d62506dfa79daec25#aed8b9f3ba024c689c72985d62506dfa79daec25" +source = "git+https://github.com/biswapanda/fastokens?rev=e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7#e79e7bd2a4b2e1bc3f372aa29a268954c95f43c7" dependencies = [ "daachorse", "fancy-regex 0.17.0", "hf-hub", "icu_normalizer", "memchr", - "pcre2", "rayon", "serde", "serde_json", @@ -4639,28 +4638,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" -[[package]] -name = "pcre2" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67" -dependencies = [ - "libc", - "log", - "pcre2-sys", -] - -[[package]] -name = "pcre2-sys" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "pear" version = "0.2.9" From 9e21cf5551ac48f36ed101d88b060f918f8e3cc3 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Sat, 14 Mar 2026 22:29:23 -0700 Subject: [PATCH 14/14] rename to fastokens --- lib/llm/src/tokenizers.rs | 4 ++-- lib/llm/src/tokenizers/{fast.rs => fastokens.rs} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename lib/llm/src/tokenizers/{fast.rs => fastokens.rs} (100%) diff --git a/lib/llm/src/tokenizers.rs b/lib/llm/src/tokenizers.rs index a036957a880..21b411cc9de 100644 --- a/lib/llm/src/tokenizers.rs +++ b/lib/llm/src/tokenizers.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -pub mod fast; +pub mod fastokens; pub mod hf; pub mod tiktoken; @@ -16,7 +16,7 @@ use std::{ops::Deref, path::Path}; use crate::protocols::TokenIdType; pub use anyhow::{Error, Result}; -pub use fast::FastTokenizer; +pub use fastokens::FastTokenizer; pub use hf::HuggingFaceTokenizer; pub use tiktoken::TikTokenTokenizer; diff --git a/lib/llm/src/tokenizers/fast.rs b/lib/llm/src/tokenizers/fastokens.rs similarity index 100% rename from lib/llm/src/tokenizers/fast.rs rename to lib/llm/src/tokenizers/fastokens.rs