diff --git a/benchmarks/benchmark_hash.py b/benchmarks/benchmark_hash.py
new file mode 100644
index 000000000000..08cdc012d652
--- /dev/null
+++ b/benchmarks/benchmark_hash.py
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Micro benchmark comparing built-in hash(), SHA-256, and xxHash.
+
+This focuses on a single test payload shaped like the prefix-cache hash input:
+ (32-byte bytes object, 32-int tuple)
+
+Usage:
+ python benchmarks/hash_micro_benchmark.py --iterations 20000
+"""
+
+from __future__ import annotations
+
+import argparse
+import random
+import statistics
+import time
+from collections.abc import Callable, Iterable
+
+from vllm.utils.hashing import sha256, xxhash
+
+
+def _generate_test_data(seed: int) -> tuple[bytes, tuple[int, ...]]:
+ """Generate a deterministic test payload."""
+ random.seed(seed)
+ bytes_data = bytes(random.getrandbits(8) for _ in range(32))
+ int_tuple = tuple(random.randint(1, 1_000_000) for _ in range(32))
+ return (bytes_data, int_tuple)
+
+
+def _benchmark_func(func: Callable[[tuple], object], data: tuple, iterations: int):
+ """Return (avg_seconds, std_seconds) for hashing `data` `iterations` times."""
+ times: list[float] = []
+
+ # Warm-up to avoid first-run noise.
+ for _ in range(200):
+ func(data)
+
+ for _ in range(iterations):
+ start = time.perf_counter()
+ func(data)
+ end = time.perf_counter()
+ times.append(end - start)
+
+ avg = statistics.mean(times)
+ std = statistics.stdev(times) if len(times) > 1 else 0.0
+ return avg, std
+
+
+def _run_benchmarks(
+ benchmarks: Iterable[tuple[str, Callable[[tuple], object]]],
+ data: tuple,
+ iterations: int,
+):
+ """Yield (name, avg, std) for each benchmark, skipping unavailable ones."""
+ for name, func in benchmarks:
+ try:
+ avg, std = _benchmark_func(func, data, iterations)
+ except ModuleNotFoundError as exc:
+ print(f"Skipping {name}: {exc}")
+ continue
+ yield name, avg, std
+
+
+def builtin_hash(data: tuple) -> int:
+ """Wrapper for Python's built-in hash()."""
+ return hash(data)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ "--iterations",
+ type=int,
+ default=10_000,
+ help="Number of measured iterations per hash function.",
+ )
+ parser.add_argument(
+ "--seed", type=int, default=42, help="Random seed for test payload."
+ )
+ args = parser.parse_args()
+
+ data = _generate_test_data(args.seed)
+ benchmarks = (
+ ("SHA256 (pickle)", sha256),
+ ("xxHash (pickle)", xxhash),
+ ("built-in hash()", builtin_hash),
+ )
+
+ print("=" * 60)
+ print("HASH FUNCTION MICRO BENCHMARK")
+ print("=" * 60)
+ print("Test data: (32-byte bytes object, 32-int tuple)")
+ print(f"Iterations: {args.iterations:,}")
+ print("=" * 60)
+
+ results = list(_run_benchmarks(benchmarks, data, args.iterations))
+ builtin_entry = next((r for r in results if r[0] == "built-in hash()"), None)
+
+ print("\nResults:")
+ for name, avg, std in results:
+ print(f" {name:16s}: {avg * 1e6:8.2f} ± {std * 1e6:6.2f} μs")
+
+ if builtin_entry:
+ _, builtin_avg, _ = builtin_entry
+ print("\n" + "=" * 60)
+ print("SUMMARY (relative to built-in hash())")
+ print("=" * 60)
+ for name, avg, _ in results:
+ if name == "built-in hash()":
+ continue
+ speed_ratio = avg / builtin_avg
+ print(f"• {name} is {speed_ratio:.1f}x slower than built-in hash()")
+ else:
+ print("\nBuilt-in hash() result missing; cannot compute speed ratios.")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmarks/benchmark_prefix_block_hash.py b/benchmarks/benchmark_prefix_block_hash.py
new file mode 100644
index 000000000000..8bcd8af0d310
--- /dev/null
+++ b/benchmarks/benchmark_prefix_block_hash.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Simple benchmark to compare prefix-cache block hashing algorithms.
+
+Example:
+ python benchmark_prefix_block_hash.py --num-blocks 20000 --block-size 32
+"""
+
+from __future__ import annotations
+
+import argparse
+import random
+import statistics
+import sys
+import time
+from collections.abc import Callable, Iterable, Sequence
+
+from vllm.utils.hashing import get_hash_fn_by_name
+from vllm.v1.core.kv_cache_utils import BlockHash, hash_block_tokens, init_none_hash
+
+SUPPORTED_ALGOS = ("sha256", "sha256_cbor", "xxhash", "xxhash_cbor")
+
+
+def _generate_blocks(
+ num_blocks: int, block_size: int, vocab_size: int, seed: int
+) -> list[list[int]]:
+ rng = random.Random(seed)
+ return [
+ [rng.randrange(vocab_size) for _ in range(block_size)]
+ for _ in range(num_blocks)
+ ]
+
+
+def _hash_all_blocks(
+ hash_fn: Callable[[object], bytes],
+ blocks: Iterable[Sequence[int]],
+) -> float:
+ parent_hash: BlockHash | None = None
+ start = time.perf_counter()
+ for block in blocks:
+ parent_hash = hash_block_tokens(hash_fn, parent_hash, block, extra_keys=None)
+ end = time.perf_counter()
+ return end - start
+
+
+def _benchmark(
+ hash_algo: str,
+ blocks: list[list[int]],
+ trials: int,
+) -> tuple[float, float, float] | None:
+ try:
+ hash_fn = get_hash_fn_by_name(hash_algo)
+ init_none_hash(hash_fn)
+ timings = [_hash_all_blocks(hash_fn, blocks) for _ in range(trials)]
+ except ModuleNotFoundError as exc:
+ print(f"Skipping {hash_algo}: {exc}", file=sys.stderr)
+ return None
+
+ avg = statistics.mean(timings)
+ best = min(timings)
+ # throughput: tokens / second
+ tokens_hashed = len(blocks) * len(blocks[0])
+ throughput = tokens_hashed / best
+ return avg, best, throughput
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--num-blocks", type=int, default=10000, help="Block count.")
+ parser.add_argument("--block-size", type=int, default=32, help="Tokens per block.")
+ parser.add_argument(
+ "--vocab-size", type=int, default=32000, help="Token id range [0, vocab_size)."
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed.")
+ parser.add_argument(
+ "--trials", type=int, default=5, help="Number of timed trials per algorithm."
+ )
+ parser.add_argument(
+ "--algorithms",
+ nargs="+",
+ default=SUPPORTED_ALGOS,
+ choices=SUPPORTED_ALGOS,
+ help="Hash algorithms to benchmark.",
+ )
+ args = parser.parse_args()
+
+ blocks = _generate_blocks(
+ args.num_blocks, args.block_size, args.vocab_size, args.seed
+ )
+ print(
+ f"Benchmarking {len(args.algorithms)} algorithms on "
+ f"{args.num_blocks} blocks (block size={args.block_size})."
+ )
+
+ for algo in args.algorithms:
+ result = _benchmark(algo, blocks, args.trials)
+ if result is None:
+ continue
+
+ avg, best, throughput = result
+ print(
+ f"{algo:14s} avg: {avg:.6f}s best: {best:.6f}s "
+ f"throughput: {throughput / 1e6:.2f}M tokens/s"
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md
index c9bc9cfe28a3..8b0e3081cdb3 100644
--- a/docs/contributing/benchmarks.md
+++ b/docs/contributing/benchmarks.md
@@ -682,6 +682,35 @@ vllm bench serve \
+#### 🧪 Hashing Benchmarks
+
+
+Show more
+
+Two helper scripts live in `benchmarks/` to compare hashing options used by prefix caching and related utilities. They are standalone (no server required) and help choose a hash algorithm before enabling prefix caching in production.
+
+- `benchmarks/benchmark_hash.py`: Micro-benchmark that measures per-call latency of three implementations on a representative `(bytes, tuple[int])` payload.
+
+```bash
+python benchmarks/benchmark_hash.py --iterations 20000 --seed 42
+```
+
+- `benchmarks/benchmark_prefix_block_hash.py`: End-to-end block hashing benchmark that runs the full prefix-cache hash pipeline (`hash_block_tokens`) across many fake blocks and reports throughput.
+
+```bash
+python benchmarks/benchmark_prefix_block_hash.py --num-blocks 20000 --block-size 32 --trials 5
+```
+
+Supported algorithms: `sha256`, `sha256_cbor`, `xxhash`, `xxhash_cbor`. Install optional deps to exercise all variants:
+
+```bash
+uv pip install xxhash cbor2
+```
+
+If an algorithm’s dependency is missing, the script will skip it and continue.
+
+
+
#### ⚡ Request Prioritization Benchmark
diff --git a/requirements/common.txt b/requirements/common.txt
index f2d1c0762ef6..4b5bf03fac26 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -46,6 +46,7 @@ scipy # Required for phi-4-multimodal-instruct
ninja # Required for xgrammar, rocm, tpu, xpu
pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
+xxhash # Required for fast hashing for prefix caching
setproctitle # Used to set process names for better debugging and monitoring
openai-harmony >= 0.0.3 # Required for gpt-oss
anthropic == 0.71.0
diff --git a/tests/v1/engine/test_engine_args.py b/tests/v1/engine/test_engine_args.py
index e96759ed66a7..527a56ff49ee 100644
--- a/tests/v1/engine/test_engine_args.py
+++ b/tests/v1/engine/test_engine_args.py
@@ -9,6 +9,7 @@
from vllm.engine.arg_utils import EngineArgs
from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser
+from vllm.utils.hashing import _xxhash
def test_prefix_caching_from_cli():
@@ -48,6 +49,21 @@ def test_prefix_caching_from_cli():
args = parser.parse_args(["--prefix-caching-hash-algo", "invalid"])
+@pytest.mark.skipif(_xxhash is None, reason="xxhash not installed")
+def test_prefix_caching_xxhash_from_cli():
+ parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
+
+ # set hash algorithm to xxhash (pickle)
+ args = parser.parse_args(["--prefix-caching-hash-algo", "xxhash"])
+ vllm_config = EngineArgs.from_cli_args(args=args).create_engine_config()
+ assert vllm_config.cache_config.prefix_caching_hash_algo == "xxhash"
+
+ # set hash algorithm to xxhash_cbor
+ args = parser.parse_args(["--prefix-caching-hash-algo", "xxhash_cbor"])
+ vllm_config = EngineArgs.from_cli_args(args=args).create_engine_config()
+ assert vllm_config.cache_config.prefix_caching_hash_algo == "xxhash_cbor"
+
+
def test_defaults_with_usage_context():
engine_args = EngineArgs(model="facebook/opt-125m")
vllm_config: VllmConfig = engine_args.create_engine_config(UsageContext.LLM_CLASS)
diff --git a/vllm/config/cache.py b/vllm/config/cache.py
index 2652c7c06ad0..30fd3af95f94 100644
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -30,7 +30,7 @@
"fp8_ds_mla",
]
MambaDType = Literal["auto", "float32"]
-PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"]
+PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor", "xxhash", "xxhash_cbor"]
KVOffloadingBackend = Literal["native", "lmcache"]
@@ -79,7 +79,11 @@ class CacheConfig:
"""Set the hash algorithm for prefix caching:\n
- "sha256" uses Pickle for object serialization before hashing.\n
- "sha256_cbor" provides a reproducible, cross-language compatible hash. It
- serializes objects using canonical CBOR and hashes them with SHA-256."""
+ serializes objects using canonical CBOR and hashes them with SHA-256.\n
+ - "xxhash" uses Pickle serialization with xxHash (128-bit) for faster,
+ non-cryptographic hashing. Requires the optional ``xxhash`` package.\n
+ - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
+ reproducible hashing. Requires the optional ``xxhash`` package."""
cpu_offload_gb: float = Field(default=0, ge=0)
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
no offloading. Intuitively, this argument can be seen as a virtual way to
diff --git a/vllm/utils/hashing.py b/vllm/utils/hashing.py
index 49f4f13d115f..07bd30350835 100644
--- a/vllm/utils/hashing.py
+++ b/vllm/utils/hashing.py
@@ -10,6 +10,14 @@
import cbor2
+try:
+ import xxhash as _xxhash
+
+ if not hasattr(_xxhash, "xxh3_128_digest"):
+ _xxhash = None
+except ImportError: # pragma: no cover
+ _xxhash = None
+
def sha256(input: Any) -> bytes:
"""Hash any picklable Python object using SHA-256.
@@ -46,6 +54,27 @@ def sha256_cbor(input: Any) -> bytes:
return hashlib.sha256(input_bytes).digest()
+def _xxhash_digest(input_bytes: bytes) -> bytes:
+ if _xxhash is None:
+ raise ModuleNotFoundError(
+ "xxhash is required for the 'xxhash' prefix caching hash algorithms. "
+ "Install it via `pip install xxhash`."
+ )
+ return _xxhash.xxh3_128_digest(input_bytes)
+
+
+def xxhash(input: Any) -> bytes:
+ """Hash picklable objects using xxHash."""
+ input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
+ return _xxhash_digest(input_bytes)
+
+
+def xxhash_cbor(input: Any) -> bytes:
+ """Hash objects serialized with CBOR using xxHash."""
+ input_bytes = cbor2.dumps(input, canonical=True)
+ return _xxhash_digest(input_bytes)
+
+
def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
"""Get a hash function by name, or raise an error if the function is not found.
@@ -59,5 +88,9 @@ def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
return sha256
if hash_fn_name == "sha256_cbor":
return sha256_cbor
+ if hash_fn_name == "xxhash":
+ return xxhash
+ if hash_fn_name == "xxhash_cbor":
+ return xxhash_cbor
raise ValueError(f"Unsupported hash function: {hash_fn_name}")
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 01ecd881115d..89cf2b6a37e6 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -12,7 +12,7 @@
from vllm import envs
from vllm.config import VllmConfig
from vllm.logger import init_logger
-from vllm.utils.hashing import sha256_cbor
+from vllm.utils.hashing import sha256_cbor, xxhash_cbor
from vllm.utils.math_utils import cdiv
from vllm.utils.mem_constants import GiB_bytes
from vllm.v1.kv_cache_interface import (
@@ -83,18 +83,19 @@ def maybe_convert_block_hash(hash_bytes: BlockHash) -> ExternalBlockHash:
#
# The function `init_none_hash` initializes this variable globally.
NONE_HASH: BlockHash
+_CBOR_HASH_FUNCTIONS = frozenset({sha256_cbor, xxhash_cbor})
def init_none_hash(hash_fn: Callable[[Any], bytes]):
global NONE_HASH
hash_seed = os.getenv("PYTHONHASHSEED")
- if hash_seed is None and hash_fn is sha256_cbor:
+ if hash_seed is None and hash_fn in _CBOR_HASH_FUNCTIONS:
logger.warning(
"PYTHONHASHSEED is not set. This will lead to non-reproducible "
- "block-hashes when using sha256_cbor as the hash function."
- "Consider setting PYTHONHASHSEED to a fixed value for "
- "reproducibility."
+ "block-hashes when using CBOR-based hash functions such as "
+ "sha256_cbor or xxhash_cbor. Consider setting PYTHONHASHSEED to a "
+ "fixed value for reproducibility."
)
if hash_seed is None: