|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
| 3 | + |
| 4 | +""" |
| 5 | +Simple benchmark to compare prefix-cache block hashing algorithms. |
| 6 | +
|
| 7 | +Example: |
| 8 | + python benchmarks/hash_perf_demo.py --num-blocks 20000 --block-size 32 |
| 9 | +""" |
| 10 | + |
| 11 | +from __future__ import annotations |
| 12 | + |
| 13 | +import argparse |
| 14 | +import random |
| 15 | +import statistics |
| 16 | +import sys |
| 17 | +import time |
| 18 | +from collections.abc import Callable, Iterable, Sequence |
| 19 | + |
| 20 | +from vllm.utils.hashing import get_hash_fn_by_name |
| 21 | +from vllm.v1.core.kv_cache_utils import BlockHash, hash_block_tokens, init_none_hash |
| 22 | + |
| 23 | +SUPPORTED_ALGOS = ("sha256", "sha256_cbor", "xxhash", "xxhash_cbor") |
| 24 | + |
| 25 | + |
| 26 | +def _generate_blocks( |
| 27 | + num_blocks: int, block_size: int, vocab_size: int, seed: int |
| 28 | +) -> list[list[int]]: |
| 29 | + rng = random.Random(seed) |
| 30 | + return [ |
| 31 | + [rng.randrange(vocab_size) for _ in range(block_size)] |
| 32 | + for _ in range(num_blocks) |
| 33 | + ] |
| 34 | + |
| 35 | + |
| 36 | +def _hash_all_blocks( |
| 37 | + hash_fn: Callable[[object], bytes], |
| 38 | + blocks: Iterable[Sequence[int]], |
| 39 | +) -> float: |
| 40 | + parent_hash: BlockHash | None = None |
| 41 | + start = time.perf_counter() |
| 42 | + for block in blocks: |
| 43 | + parent_hash = hash_block_tokens(hash_fn, parent_hash, block, extra_keys=None) |
| 44 | + end = time.perf_counter() |
| 45 | + return end - start |
| 46 | + |
| 47 | + |
| 48 | +def _benchmark( |
| 49 | + hash_algo: str, |
| 50 | + blocks: list[list[int]], |
| 51 | + trials: int, |
| 52 | +) -> tuple[float, float, float] | None: |
| 53 | + try: |
| 54 | + hash_fn = get_hash_fn_by_name(hash_algo) |
| 55 | + init_none_hash(hash_fn) |
| 56 | + timings = [_hash_all_blocks(hash_fn, blocks) for _ in range(trials)] |
| 57 | + except ModuleNotFoundError as exc: |
| 58 | + print(f"Skipping {hash_algo}: {exc}", file=sys.stderr) |
| 59 | + return None |
| 60 | + |
| 61 | + avg = statistics.mean(timings) |
| 62 | + best = min(timings) |
| 63 | + # throughput: tokens / second |
| 64 | + tokens_hashed = len(blocks) * len(blocks[0]) |
| 65 | + throughput = tokens_hashed / best |
| 66 | + return avg, best, throughput |
| 67 | + |
| 68 | + |
| 69 | +def main() -> None: |
| 70 | + parser = argparse.ArgumentParser(description=__doc__) |
| 71 | + parser.add_argument("--num-blocks", type=int, default=10000, help="Block count.") |
| 72 | + parser.add_argument("--block-size", type=int, default=32, help="Tokens per block.") |
| 73 | + parser.add_argument( |
| 74 | + "--vocab-size", type=int, default=32000, help="Token id range [0, vocab_size)." |
| 75 | + ) |
| 76 | + parser.add_argument("--seed", type=int, default=0, help="Random seed.") |
| 77 | + parser.add_argument( |
| 78 | + "--trials", type=int, default=5, help="Number of timed trials per algorithm." |
| 79 | + ) |
| 80 | + parser.add_argument( |
| 81 | + "--algorithms", |
| 82 | + nargs="+", |
| 83 | + default=SUPPORTED_ALGOS, |
| 84 | + choices=SUPPORTED_ALGOS, |
| 85 | + help="Hash algorithms to benchmark.", |
| 86 | + ) |
| 87 | + args = parser.parse_args() |
| 88 | + |
| 89 | + blocks = _generate_blocks(args.num_blocks, args.block_size, args.vocab_size, args.seed) |
| 90 | + print( |
| 91 | + f"Benchmarking {len(args.algorithms)} algorithms on " |
| 92 | + f"{args.num_blocks} blocks (block size={args.block_size})." |
| 93 | + ) |
| 94 | + |
| 95 | + for algo in args.algorithms: |
| 96 | + result = _benchmark(algo, blocks, args.trials) |
| 97 | + if result is None: |
| 98 | + continue |
| 99 | + |
| 100 | + avg, best, throughput = result |
| 101 | + print( |
| 102 | + f"{algo:14s} avg: {avg:.6f}s best: {best:.6f}s " |
| 103 | + f"throughput: {throughput/1e6:.2f}M tokens/s" |
| 104 | + ) |
| 105 | + |
| 106 | + |
| 107 | +if __name__ == "__main__": |
| 108 | + main() |
0 commit comments