[FEAT] Add xxhash and xxhash_cbor algorithms for prefix caching and update tests

LuminolT · LuminolT · commit 37a406900cdb · 2025-11-21T16:05:52.000+08:00
Signed-off-by: LuminolT &lt;lumischen01@gmail.com&gt;
diff --git a/benchmarks/hash_perf_demo.py b/benchmarks/hash_perf_demo.py
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Simple benchmark to compare prefix-cache block hashing algorithms.
+
+Example:
+    python benchmarks/hash_perf_demo.py --num-blocks 20000 --block-size 32
+"""
+
+from __future__ import annotations
+
+import argparse
+import random
+import statistics
+import sys
+import time
+from collections.abc import Callable, Iterable, Sequence
+
+from vllm.utils.hashing import get_hash_fn_by_name
+from vllm.v1.core.kv_cache_utils import BlockHash, hash_block_tokens, init_none_hash
+
+SUPPORTED_ALGOS = ("sha256", "sha256_cbor", "xxhash", "xxhash_cbor")
+
+
+def _generate_blocks(
+    num_blocks: int, block_size: int, vocab_size: int, seed: int
+) -> list[list[int]]:
+    rng = random.Random(seed)
+    return [
+        [rng.randrange(vocab_size) for _ in range(block_size)]
+        for _ in range(num_blocks)
+    ]
+
+
+def _hash_all_blocks(
+    hash_fn: Callable[[object], bytes],
+    blocks: Iterable[Sequence[int]],
+) -> float:
+    parent_hash: BlockHash | None = None
+    start = time.perf_counter()
+    for block in blocks:
+        parent_hash = hash_block_tokens(hash_fn, parent_hash, block, extra_keys=None)
+    end = time.perf_counter()
+    return end - start
+
+
+def _benchmark(
+    hash_algo: str,
+    blocks: list[list[int]],
+    trials: int,
+) -> tuple[float, float, float] | None:
+    try:
+        hash_fn = get_hash_fn_by_name(hash_algo)
+        init_none_hash(hash_fn)
+        timings = [_hash_all_blocks(hash_fn, blocks) for _ in range(trials)]
+    except ModuleNotFoundError as exc:
+        print(f"Skipping {hash_algo}: {exc}", file=sys.stderr)
+        return None
+
+    avg = statistics.mean(timings)
+    best = min(timings)
+    # throughput: tokens / second
+    tokens_hashed = len(blocks) * len(blocks[0])
+    throughput = tokens_hashed / best
+    return avg, best, throughput
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--num-blocks", type=int, default=10000, help="Block count.")
+    parser.add_argument("--block-size", type=int, default=32, help="Tokens per block.")
+    parser.add_argument(
+        "--vocab-size", type=int, default=32000, help="Token id range [0, vocab_size)."
+    )
+    parser.add_argument("--seed", type=int, default=0, help="Random seed.")
+    parser.add_argument(
+        "--trials", type=int, default=5, help="Number of timed trials per algorithm."
+    )
+    parser.add_argument(
+        "--algorithms",
+        nargs="+",
+        default=SUPPORTED_ALGOS,
+        choices=SUPPORTED_ALGOS,
+        help="Hash algorithms to benchmark.",
+    )
+    args = parser.parse_args()
+
+    blocks = _generate_blocks(args.num_blocks, args.block_size, args.vocab_size, args.seed)
+    print(
+        f"Benchmarking {len(args.algorithms)} algorithms on "
+        f"{args.num_blocks} blocks (block size={args.block_size})."
+    )
+
+    for algo in args.algorithms:
+        result = _benchmark(algo, blocks, args.trials)
+        if result is None:
+            continue
+
+        avg, best, throughput = result
+        print(
+            f"{algo:14s} avg: {avg:.6f}s  best: {best:.6f}s  "
+            f"throughput: {throughput/1e6:.2f}M tokens/s"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/v1/engine/test_engine_args.py b/tests/v1/engine/test_engine_args.py
@@ -42,6 +42,16 @@ def test_prefix_caching_from_cli():
     vllm_config = EngineArgs.from_cli_args(args=args).create_engine_config()
     assert vllm_config.cache_config.prefix_caching_hash_algo == "sha256"
 
+    # set hash algorithm to xxhash (pickle)
+    args = parser.parse_args(["--prefix-caching-hash-algo", "xxhash"])
+    vllm_config = EngineArgs.from_cli_args(args=args).create_engine_config()
+    assert vllm_config.cache_config.prefix_caching_hash_algo == "xxhash"
+
+    # set hash algorithm to xxhash_cbor
+    args = parser.parse_args(["--prefix-caching-hash-algo", "xxhash_cbor"])
+    vllm_config = EngineArgs.from_cli_args(args=args).create_engine_config()
+    assert vllm_config.cache_config.prefix_caching_hash_algo == "xxhash_cbor"
+
     # an invalid hash algorithm raises an error
     parser.exit_on_error = False
     with pytest.raises(ArgumentError):
diff --git a/vllm/config/cache.py b/vllm/config/cache.py
@@ -30,7 +30,7 @@
     "fp8_ds_mla",
 ]
 MambaDType = Literal["auto", "float32"]
-PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"]
+PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor", "xxhash", "xxhash_cbor"]
 KVOffloadingBackend = Literal["native", "lmcache"]
 
 
@@ -79,7 +79,11 @@ class CacheConfig:
     """Set the hash algorithm for prefix caching:\n
     - "sha256" uses Pickle for object serialization before hashing.\n
     - "sha256_cbor" provides a reproducible, cross-language compatible hash. It
-    serializes objects using canonical CBOR and hashes them with SHA-256."""
+    serializes objects using canonical CBOR and hashes them with SHA-256.\n
+    - "xxhash" uses Pickle serialization with xxHash (128-bit) for faster,
+    non-cryptographic hashing. Requires the optional ``xxhash`` package.\n
+    - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
+    reproducible hashing. Requires the optional ``xxhash`` package."""
     cpu_offload_gb: float = Field(default=0, ge=0)
     """The space in GiB to offload to CPU, per GPU. Default is 0, which means
     no offloading. Intuitively, this argument can be seen as a virtual way to
diff --git a/vllm/utils/hashing.py b/vllm/utils/hashing.py
@@ -9,6 +9,10 @@
 from typing import Any
 
 import cbor2
+try:
+    import xxhash as _xxhash
+except ImportError:  # pragma: no cover
+    _xxhash = None
 
 
 def sha256(input: Any) -> bytes:
@@ -46,6 +50,27 @@ def sha256_cbor(input: Any) -> bytes:
     return hashlib.sha256(input_bytes).digest()
 
 
+def _xxhash_digest(input_bytes: bytes) -> bytes:
+    if _xxhash is None:
+        raise ModuleNotFoundError(
+            "xxhash is required for the 'xxhash' prefix caching hash algorithms. "
+            "Install it via `pip install xxhash`."
+        )
+    return _xxhash.xxh3_128_digest(input_bytes)
+
+
+def xxhash(input: Any) -> bytes:
+    """Hash picklable objects using xxHash."""
+    input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
+    return _xxhash_digest(input_bytes)
+
+
+def xxhash_cbor(input: Any) -> bytes:
+    """Hash objects serialized with CBOR using xxHash."""
+    input_bytes = cbor2.dumps(input, canonical=True)
+    return _xxhash_digest(input_bytes)
+
+
 def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
     """Get a hash function by name, or raise an error if the function is not found.
 
@@ -59,5 +84,9 @@ def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
         return sha256
     if hash_fn_name == "sha256_cbor":
         return sha256_cbor
+    if hash_fn_name == "xxhash":
+        return xxhash
+    if hash_fn_name == "xxhash_cbor":
+        return xxhash_cbor
 
     raise ValueError(f"Unsupported hash function: {hash_fn_name}")
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -12,7 +12,7 @@
 from vllm import envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.utils.hashing import sha256_cbor
+from vllm.utils.hashing import sha256_cbor, xxhash_cbor
 from vllm.utils.math_utils import cdiv
 from vllm.utils.mem_constants import GiB_bytes
 from vllm.v1.kv_cache_interface import (
@@ -83,18 +83,19 @@ def maybe_convert_block_hash(hash_bytes: BlockHash) -> ExternalBlockHash:
 #
 # The function `init_none_hash` initializes this variable globally.
 NONE_HASH: BlockHash
+_CBOR_HASH_FUNCTIONS = frozenset({sha256_cbor, xxhash_cbor})
 
 
 def init_none_hash(hash_fn: Callable[[Any], bytes]):
     global NONE_HASH
 
     hash_seed = os.getenv("PYTHONHASHSEED")
-    if hash_seed is None and hash_fn is sha256_cbor:
+    if hash_seed is None and hash_fn in _CBOR_HASH_FUNCTIONS:
         logger.warning(
             "PYTHONHASHSEED is not set. This will lead to non-reproducible "
-            "block-hashes when using sha256_cbor as the hash function."
-            "Consider setting PYTHONHASHSEED to a fixed value for "
-            "reproducibility."
+            "block-hashes when using CBOR-based hash functions such as "
+            "sha256_cbor or xxhash_cbor. Consider setting PYTHONHASHSEED to a "
+            "fixed value for reproducibility."
         )
 
     if hash_seed is None: