Skip to content

Commit 9c2867a

Browse files
committed
Add: OpenCV benchmarks
1 parent f3a300b commit 9c2867a

File tree

4 files changed

+125
-46
lines changed

4 files changed

+125
-46
lines changed

README.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,15 +200,19 @@ Generating random byte-streams:
200200

201201
Performing in-place lookups in a precomputed table of 256 bytes:
202202

203-
| Library | Short Words | Long Lines |
204-
| ----------------------------- | -------------: | -------------: |
205-
| Rust 🦀 | | |
206-
| serial code | __0.61 GiB/s__ | 1.49 GiB/s |
207-
| `stringzilla::lookup_inplace` | 0.54 GiB/s | __9.90 GiB/s__ |
208-
| | | |
209-
| Python 🐍 | | |
210-
| `bytes.translate` | 1.18 GiB/s | 1.10 GiB/s |
211-
| `stringzilla.Str.translate` | __2.15 GiB/s__ | __2.26 GiB/s__ |
203+
| Library | Short Words | Long Lines |
204+
| ------------------------------- | -------------: | -------------: |
205+
| Rust 🦀 | | |
206+
| serial code | __0.61 GiB/s__ | 1.49 GiB/s |
207+
| `stringzilla::lookup_inplace` | 0.54 GiB/s | __9.90 GiB/s__ |
208+
| | | |
209+
| Python 🐍 | | |
210+
| `bytes.translate` | 0.05 GiB/s | 1.92 GiB/s |
211+
| `numpy.take` | 0.01 GiB/s | 0.85 GiB/s |
212+
| `opencv.LUT` | 0.01 GiB/s | 1.95 GiB/s |
213+
| `opencv.LUT` inplace | 0.01 GiB/s | 2.16 GiB/s |
214+
| `stringzilla.translate` | 0.07 GiB/s | 7.92 GiB/s |
215+
| `stringzilla.translate` inplace | __0.06 GiB/s__ | __8.14 GiB/s__ |
212216

213217

214218
## Similarities Scoring

bench_memory.py

Lines changed: 110 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@
33
# "stringzilla",
44
# "numpy",
55
# "pycryptodome",
6+
# "opencv-python",
67
# ]
78
# ///
89
"""
910
Python memory-centric benchmarks analogous to bench_memory.rs.
1011
1112
Includes two groups:
12-
- Lookup-table transforms (256-byte LUT): bytes.translate, stringzilla.Str.translate
13+
- Lookup-table transforms (256-byte LUT): bytes.translate, stringzilla.Str.translate, OpenCV LUT, NumPy indexing
1314
- Random byte generation: NumPy PCG64, NumPy Philox, and PyCryptodome AES-CTR
1415
1516
Examples:
1617
python bench_memory.py --dataset README.md --tokens lines
17-
python bench_memory.py --dataset README.md --tokens words -k "translate|AES-CTR|PCG64|Philox"
18+
python bench_memory.py --dataset README.md --tokens words -k "translate|LUT|AES-CTR|PCG64|Philox"
1819
"""
1920

2021
from __future__ import annotations
@@ -28,6 +29,7 @@
2829
import numpy as np
2930
import Crypto as pycryptodome
3031
from Crypto.Cipher import AES as PyCryptoDomeAES
32+
import cv2
3133

3234
from utils import add_common_args, load_dataset, name_matches, now_ns, tokenize_dataset
3335

@@ -37,24 +39,68 @@ def log_system_info():
3739
print(f"- StringZilla: {sz.__version__} with {sz.__capabilities_str__}")
3840
print(f"- NumPy: {np.__version__}")
3941
print(f"- PyCryptoDome: {pycryptodome.__version__}")
42+
print(f"- OpenCV: {cv2.__version__} (defaults to {cv2.getNumThreads()} threads)")
4043
print()
4144

4245

43-
def sz_translate(haystack: sz.Str, look_up_table: bytes) -> int:
44-
# StringZilla translation using 256-byte LUT
45-
result = haystack.translate(look_up_table)
46+
def sz_translate_allocating(haystack: bytes, look_up_table: bytes) -> int:
47+
"""StringZilla translation with allocation (bytes input)."""
48+
result = sz.translate(haystack, look_up_table)
4649
return len(result)
4750

4851

52+
def sz_translate_inplace(haystack: memoryview, look_up_table: bytes) -> int:
53+
"""StringZilla translation in-place (memoryview input)."""
54+
sz.translate(haystack, look_up_table, inplace=True)
55+
return len(haystack)
56+
57+
4958
def bytes_translate(haystack_bytes: bytes, lut: bytes) -> int:
59+
"""Python bytes.translate (always allocating)."""
5060
result = haystack_bytes.translate(lut)
5161
return len(result)
5262

5363

64+
def opencv_lut_allocating(haystack_array: np.ndarray, lut: np.ndarray) -> int:
65+
"""OpenCV LUT with allocation."""
66+
result = cv2.LUT(haystack_array, lut)
67+
return len(result)
68+
69+
70+
def opencv_lut_inplace(haystack_array: np.ndarray, lut: np.ndarray) -> int:
71+
"""OpenCV LUT in-place."""
72+
cv2.LUT(haystack_array, lut, dst=haystack_array)
73+
return len(haystack_array)
74+
75+
76+
def numpy_lut_indexing_allocating(haystack_array: np.ndarray, lut: np.ndarray) -> int:
77+
"""NumPy array indexing (always allocating)."""
78+
result = lut[haystack_array]
79+
return len(result)
80+
81+
82+
def numpy_lut_indexing_inplace(haystack_array: np.ndarray, lut: np.ndarray) -> int:
83+
"""NumPy array indexing in-place."""
84+
haystack_array[:] = lut[haystack_array]
85+
return len(haystack_array)
86+
87+
88+
def numpy_lut_take_allocating(haystack_array: np.ndarray, lut: np.ndarray) -> int:
89+
"""NumPy take function (always allocating)."""
90+
result = np.take(lut, haystack_array)
91+
return len(result)
92+
93+
94+
def numpy_lut_take_inplace(haystack_array: np.ndarray, lut: np.ndarray) -> int:
95+
"""NumPy take function in-place."""
96+
np.take(lut, haystack_array, out=haystack_array)
97+
return len(haystack_array)
98+
99+
54100
def bench_translate(
55101
name: str,
56-
haystack,
57-
tables: List[bytes],
102+
tokens,
103+
table: bytes,
58104
op: Callable[[object, bytes], int],
59105
time_limit_seconds: float,
60106
) -> None:
@@ -63,17 +109,17 @@ def bench_translate(
63109

64110
requested = 0
65111
produced_bytes = 0
112+
check_frequency = 100
66113

67-
i = 0
68-
while True:
69-
table = tables[i % len(tables)]
70-
produced_bytes += op(haystack, table)
114+
for token in tokens:
115+
produced_bytes += op(token, table)
71116
requested += 1
117+
check_frequency -= 1
72118

73-
if requested % 10 == 0:
119+
if check_frequency == 0:
74120
if (now_ns() - start_time) >= time_limit_ns:
75121
break
76-
i += 1
122+
check_frequency = 100
77123

78124
secs = (now_ns() - start_time) / 1e9
79125
gbps = produced_bytes / (1e9 * secs) if secs > 0 else 0.0
@@ -196,32 +242,60 @@ def main() -> int:
196242
except re.error as e:
197243
parser.error(f"Invalid regex for --filter: {e}")
198244

245+
# Disable OpenCV multithreading for more consistent results
246+
cv2.setNumThreads(1)
247+
199248
# ---------------- Lookup-table transforms ----------------
200-
print("\n=== Lookup-table Transforms ===")
201-
identity = bytes(range(256))
202-
reverse = bytes(reversed(identity))
203-
repeated = bytes(range(64)) * 4
204-
hex_table = b"0123456789abcdef" * 16
205-
206-
# Operate on the full contiguous string via StringZilla's view
207-
sz_str = sz.Str(text)
208-
if name_matches("stringzilla.Str.translate(reverse)", pattern):
209-
bench_translate("stringzilla.Str.translate(reverse)", sz_str, [reverse], sz_translate, args.time_limit)
210-
if name_matches("stringzilla.Str.translate(repeated)", pattern):
211-
bench_translate("stringzilla.Str.translate(repeated)", sz_str, [repeated], sz_translate, args.time_limit)
212-
if name_matches("stringzilla.Str.translate(hex)", pattern):
213-
bench_translate("stringzilla.Str.translate(hex)", sz_str, [hex_table], sz_translate, args.time_limit)
214-
215-
# Python bytes.translate on the contiguous bytes
216-
if name_matches("bytes.translate(reverse)", pattern):
217-
bench_translate("bytes.translate(reverse)", data, [reverse], bytes_translate, args.time_limit)
218-
if name_matches("bytes.translate(repeated)", pattern):
219-
bench_translate("bytes.translate(repeated)", data, [repeated], bytes_translate, args.time_limit)
220-
if name_matches("bytes.translate(hex)", pattern):
221-
bench_translate("bytes.translate(hex)", data, [hex_table], bytes_translate, args.time_limit)
249+
print()
250+
print("--- LUT Transforms ---")
251+
252+
# Create reverse LUT
253+
reverse = bytes(reversed(range(256)))
254+
reverse_np = np.arange(255, -1, -1, dtype=np.uint8)
255+
256+
# Convert tokens to numpy arrays for token-based benchmarks
257+
tokens_np = [np.array(np.frombuffer(token, dtype=np.uint8)) for token in tokens_b]
258+
tokens_mv = [memoryview(bytearray(token)) for token in tokens_b]
259+
260+
# Python bytes.translate (always allocating)
261+
if name_matches("bytes.translate(new)", pattern):
262+
bench_translate("bytes.translate(new)", tokens_b, reverse, bytes_translate, args.time_limit)
263+
264+
# OpenCV allocating
265+
if name_matches("opencv.LUT(new)", pattern):
266+
bench_translate("opencv.LUT(new)", tokens_np, reverse_np, opencv_lut_allocating, args.time_limit)
267+
268+
# OpenCV in-place
269+
if name_matches("opencv.LUT(inplace)", pattern):
270+
bench_translate("opencv.LUT(inplace)", tokens_np, reverse_np, opencv_lut_inplace, args.time_limit)
271+
272+
# NumPy indexing allocating
273+
if name_matches("numpy.indexing(new)", pattern):
274+
bench_translate("numpy.indexing(new)", tokens_np, reverse_np, numpy_lut_indexing_allocating, args.time_limit)
275+
276+
# NumPy indexing in-place
277+
if name_matches("numpy.indexing(inplace)", pattern):
278+
bench_translate("numpy.indexing(inplace)", tokens_np, reverse_np, numpy_lut_indexing_inplace, args.time_limit)
279+
280+
# NumPy take allocating
281+
if name_matches("numpy.take(new)", pattern):
282+
bench_translate("numpy.take(new)", tokens_np, reverse_np, numpy_lut_take_allocating, args.time_limit)
283+
284+
# NumPy take in-place
285+
if name_matches("numpy.take(inplace)", pattern):
286+
bench_translate("numpy.take(inplace)", tokens_np, reverse_np, numpy_lut_take_inplace, args.time_limit)
287+
288+
# StringZilla allocating
289+
if name_matches("stringzilla.translate(new)", pattern):
290+
bench_translate("stringzilla.translate(new)", tokens_b, reverse, sz_translate_allocating, args.time_limit)
291+
292+
# StringZilla in-place (need memoryviews for each token)
293+
if name_matches("stringzilla.translate(inplace)", pattern):
294+
bench_translate("stringzilla.translate(inplace)", tokens_mv, reverse, sz_translate_inplace, args.time_limit)
222295

223296
# ---------------- Random byte generation ----------------
224-
print("\n=== Random Byte Generation ===")
297+
print()
298+
print("--- Random Byte Generation ---")
225299
sizes = sizes_from_tokens(tokens_b)
226300

227301
if name_matches("pycryptodome.AES-CTR", pattern):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ similarities = [
4444
hash = ["stringzilla", "xxhash", "blake3", "google-crc32c", "mmh3", "cityhash"]
4545

4646
# Random byte generators and translation tables
47-
memory = ["stringzilla", "numpy", "pycryptodome"]
47+
memory = ["stringzilla", "numpy", "pycryptodome", "opencv-python"]
4848

4949
[tool.ruff]
5050
# Enforce 120-column lines across formatting and linting

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ polars # https://github.com/pola-rs/polars
3535
# For memory RNG benchmarks
3636
numpy # https://github.com/numpy/numpy
3737
pycryptodome # https://github.com/Legrandin/pycryptodome
38+
opencv-python # https://github.com/opencv/opencv-python

0 commit comments

Comments
 (0)