Skip to content

Commit 7882c26

Browse files
committed
[FEAT] Add micro benchmark for comparing built-in hash(), SHA-256, and xxHash from [23673](#23673)
1 parent 2870ba6 commit 7882c26

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed

benchmarks/benchmark_hash.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
Micro benchmark comparing built-in hash(), SHA-256, and xxHash.
5+
6+
This focuses on a single test payload shaped like the prefix-cache hash input:
7+
(32-byte bytes object, 32-int tuple)
8+
9+
Usage:
10+
python benchmarks/hash_micro_benchmark.py --iterations 20000
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import argparse
16+
import random
17+
import statistics
18+
import time
19+
from typing import Callable, Iterable
20+
21+
from vllm.utils.hashing import sha256, xxhash
22+
23+
24+
def _generate_test_data(seed: int) -> tuple[bytes, tuple[int, ...]]:
25+
"""Generate a deterministic test payload."""
26+
random.seed(seed)
27+
bytes_data = bytes(random.getrandbits(8) for _ in range(32))
28+
int_tuple = tuple(random.randint(1, 1_000_000) for _ in range(32))
29+
return (bytes_data, int_tuple)
30+
31+
32+
def _benchmark_func(func: Callable[[tuple], object], data: tuple, iterations: int):
33+
"""Return (avg_seconds, std_seconds) for hashing `data` `iterations` times."""
34+
times: list[float] = []
35+
36+
# Warm-up to avoid first-run noise.
37+
for _ in range(200):
38+
func(data)
39+
40+
for _ in range(iterations):
41+
start = time.perf_counter()
42+
func(data)
43+
end = time.perf_counter()
44+
times.append(end - start)
45+
46+
avg = statistics.mean(times)
47+
std = statistics.stdev(times) if len(times) > 1 else 0.0
48+
return avg, std
49+
50+
51+
def _run_benchmarks(
52+
benchmarks: Iterable[tuple[str, Callable[[tuple], object]]],
53+
data: tuple,
54+
iterations: int,
55+
):
56+
"""Yield (name, avg, std) for each benchmark, skipping unavailable ones."""
57+
for name, func in benchmarks:
58+
try:
59+
avg, std = _benchmark_func(func, data, iterations)
60+
except ModuleNotFoundError as exc:
61+
print(f"Skipping {name}: {exc}")
62+
continue
63+
yield name, avg, std
64+
65+
66+
def builtin_hash(data: tuple) -> int:
67+
"""Wrapper for Python's built-in hash()."""
68+
return hash(data)
69+
70+
71+
def main() -> None:
72+
parser = argparse.ArgumentParser(description=__doc__)
73+
parser.add_argument(
74+
"--iterations",
75+
type=int,
76+
default=10_000,
77+
help="Number of measured iterations per hash function.",
78+
)
79+
parser.add_argument(
80+
"--seed", type=int, default=42, help="Random seed for test payload."
81+
)
82+
args = parser.parse_args()
83+
84+
data = _generate_test_data(args.seed)
85+
benchmarks = (
86+
("SHA256 (pickle)", sha256),
87+
("xxHash (pickle)", xxhash),
88+
("built-in hash()", builtin_hash),
89+
)
90+
91+
print("=" * 60)
92+
print("HASH FUNCTION MICRO BENCHMARK")
93+
print("=" * 60)
94+
print("Test data: (32-byte bytes object, 32-int tuple)")
95+
print(f"Iterations: {args.iterations:,}")
96+
print("=" * 60)
97+
98+
results = list(_run_benchmarks(benchmarks, data, args.iterations))
99+
builtin_entry = next((r for r in results if r[0] == "built-in hash()"), None)
100+
101+
print("\nResults:")
102+
for name, avg, std in results:
103+
print(f" {name:16s}: {avg * 1e6:8.2f} ± {std * 1e6:6.2f} μs")
104+
105+
if builtin_entry:
106+
_, builtin_avg, _ = builtin_entry
107+
print("\n" + "=" * 60)
108+
print("SUMMARY (relative to built-in hash())")
109+
print("=" * 60)
110+
for name, avg, _ in results:
111+
if name == "built-in hash()":
112+
continue
113+
speed_ratio = avg / builtin_avg
114+
print(f"• {name} is {speed_ratio:.1f}x slower than built-in hash()")
115+
else:
116+
print("\nBuilt-in hash() result missing; cannot compute speed ratios.")
117+
118+
119+
if __name__ == "__main__":
120+
main()

0 commit comments

Comments
 (0)