33# "stringzilla",
44# "numpy",
55# "pycryptodome",
6+ # "opencv-python",
67# ]
78# ///
89"""
910Python memory-centric benchmarks analogous to bench_memory.rs.
1011
1112Includes two groups:
12- - Lookup-table transforms (256-byte LUT): bytes.translate, stringzilla.Str.translate
13+ - Lookup-table transforms (256-byte LUT): bytes.translate, stringzilla.Str.translate, OpenCV LUT, NumPy indexing
1314- Random byte generation: NumPy PCG64, NumPy Philox, and PyCryptodome AES-CTR
1415
1516Examples:
1617 python bench_memory.py --dataset README.md --tokens lines
17- python bench_memory.py --dataset README.md --tokens words -k "translate|AES-CTR|PCG64|Philox"
18+ python bench_memory.py --dataset README.md --tokens words -k "translate|LUT| AES-CTR|PCG64|Philox"
1819"""
1920
2021from __future__ import annotations
2829import numpy as np
2930import Crypto as pycryptodome
3031from Crypto .Cipher import AES as PyCryptoDomeAES
32+ import cv2
3133
3234from utils import add_common_args , load_dataset , name_matches , now_ns , tokenize_dataset
3335
@@ -37,24 +39,68 @@ def log_system_info():
3739 print (f"- StringZilla: { sz .__version__ } with { sz .__capabilities_str__ } " )
3840 print (f"- NumPy: { np .__version__ } " )
3941 print (f"- PyCryptoDome: { pycryptodome .__version__ } " )
42+ print (f"- OpenCV: { cv2 .__version__ } (defaults to { cv2 .getNumThreads ()} threads)" )
4043 print ()
4144
4245
43- def sz_translate (haystack : sz . Str , look_up_table : bytes ) -> int :
44- # StringZilla translation using 256-byte LUT
45- result = haystack .translate (look_up_table )
46+ def sz_translate_allocating (haystack : bytes , look_up_table : bytes ) -> int :
47+ """ StringZilla translation with allocation (bytes input)."""
48+ result = sz .translate (haystack , look_up_table )
4649 return len (result )
4750
4851
52+ def sz_translate_inplace (haystack : memoryview , look_up_table : bytes ) -> int :
53+ """StringZilla translation in-place (memoryview input)."""
54+ sz .translate (haystack , look_up_table , inplace = True )
55+ return len (haystack )
56+
57+
4958def bytes_translate (haystack_bytes : bytes , lut : bytes ) -> int :
59+ """Python bytes.translate (always allocating)."""
5060 result = haystack_bytes .translate (lut )
5161 return len (result )
5262
5363
64+ def opencv_lut_allocating (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
65+ """OpenCV LUT with allocation."""
66+ result = cv2 .LUT (haystack_array , lut )
67+ return len (result )
68+
69+
70+ def opencv_lut_inplace (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
71+ """OpenCV LUT in-place."""
72+ cv2 .LUT (haystack_array , lut , dst = haystack_array )
73+ return len (haystack_array )
74+
75+
76+ def numpy_lut_indexing_allocating (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
77+ """NumPy array indexing (always allocating)."""
78+ result = lut [haystack_array ]
79+ return len (result )
80+
81+
82+ def numpy_lut_indexing_inplace (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
83+ """NumPy array indexing in-place."""
84+ haystack_array [:] = lut [haystack_array ]
85+ return len (haystack_array )
86+
87+
88+ def numpy_lut_take_allocating (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
89+ """NumPy take function (always allocating)."""
90+ result = np .take (lut , haystack_array )
91+ return len (result )
92+
93+
94+ def numpy_lut_take_inplace (haystack_array : np .ndarray , lut : np .ndarray ) -> int :
95+ """NumPy take function in-place."""
96+ np .take (lut , haystack_array , out = haystack_array )
97+ return len (haystack_array )
98+
99+
54100def bench_translate (
55101 name : str ,
56- haystack ,
57- tables : List [ bytes ] ,
102+ tokens ,
103+ table : bytes ,
58104 op : Callable [[object , bytes ], int ],
59105 time_limit_seconds : float ,
60106) -> None :
@@ -63,17 +109,17 @@ def bench_translate(
63109
64110 requested = 0
65111 produced_bytes = 0
112+ check_frequency = 100
66113
67- i = 0
68- while True :
69- table = tables [i % len (tables )]
70- produced_bytes += op (haystack , table )
114+ for token in tokens :
115+ produced_bytes += op (token , table )
71116 requested += 1
117+ check_frequency -= 1
72118
73- if requested % 10 == 0 :
119+ if check_frequency == 0 :
74120 if (now_ns () - start_time ) >= time_limit_ns :
75121 break
76- i += 1
122+ check_frequency = 100
77123
78124 secs = (now_ns () - start_time ) / 1e9
79125 gbps = produced_bytes / (1e9 * secs ) if secs > 0 else 0.0
@@ -196,32 +242,60 @@ def main() -> int:
196242 except re .error as e :
197243 parser .error (f"Invalid regex for --filter: { e } " )
198244
245+ # Disable OpenCV multithreading for more consistent results
246+ cv2 .setNumThreads (1 )
247+
199248 # ---------------- Lookup-table transforms ----------------
200- print ("\n === Lookup-table Transforms ===" )
201- identity = bytes (range (256 ))
202- reverse = bytes (reversed (identity ))
203- repeated = bytes (range (64 )) * 4
204- hex_table = b"0123456789abcdef" * 16
205-
206- # Operate on the full contiguous string via StringZilla's view
207- sz_str = sz .Str (text )
208- if name_matches ("stringzilla.Str.translate(reverse)" , pattern ):
209- bench_translate ("stringzilla.Str.translate(reverse)" , sz_str , [reverse ], sz_translate , args .time_limit )
210- if name_matches ("stringzilla.Str.translate(repeated)" , pattern ):
211- bench_translate ("stringzilla.Str.translate(repeated)" , sz_str , [repeated ], sz_translate , args .time_limit )
212- if name_matches ("stringzilla.Str.translate(hex)" , pattern ):
213- bench_translate ("stringzilla.Str.translate(hex)" , sz_str , [hex_table ], sz_translate , args .time_limit )
214-
215- # Python bytes.translate on the contiguous bytes
216- if name_matches ("bytes.translate(reverse)" , pattern ):
217- bench_translate ("bytes.translate(reverse)" , data , [reverse ], bytes_translate , args .time_limit )
218- if name_matches ("bytes.translate(repeated)" , pattern ):
219- bench_translate ("bytes.translate(repeated)" , data , [repeated ], bytes_translate , args .time_limit )
220- if name_matches ("bytes.translate(hex)" , pattern ):
221- bench_translate ("bytes.translate(hex)" , data , [hex_table ], bytes_translate , args .time_limit )
249+ print ()
250+ print ("--- LUT Transforms ---" )
251+
252+ # Create reverse LUT
253+ reverse = bytes (reversed (range (256 )))
254+ reverse_np = np .arange (255 , - 1 , - 1 , dtype = np .uint8 )
255+
256+ # Convert tokens to numpy arrays for token-based benchmarks
257+ tokens_np = [np .array (np .frombuffer (token , dtype = np .uint8 )) for token in tokens_b ]
258+ tokens_mv = [memoryview (bytearray (token )) for token in tokens_b ]
259+
260+ # Python bytes.translate (always allocating)
261+ if name_matches ("bytes.translate(new)" , pattern ):
262+ bench_translate ("bytes.translate(new)" , tokens_b , reverse , bytes_translate , args .time_limit )
263+
264+ # OpenCV allocating
265+ if name_matches ("opencv.LUT(new)" , pattern ):
266+ bench_translate ("opencv.LUT(new)" , tokens_np , reverse_np , opencv_lut_allocating , args .time_limit )
267+
268+ # OpenCV in-place
269+ if name_matches ("opencv.LUT(inplace)" , pattern ):
270+ bench_translate ("opencv.LUT(inplace)" , tokens_np , reverse_np , opencv_lut_inplace , args .time_limit )
271+
272+ # NumPy indexing allocating
273+ if name_matches ("numpy.indexing(new)" , pattern ):
274+ bench_translate ("numpy.indexing(new)" , tokens_np , reverse_np , numpy_lut_indexing_allocating , args .time_limit )
275+
276+ # NumPy indexing in-place
277+ if name_matches ("numpy.indexing(inplace)" , pattern ):
278+ bench_translate ("numpy.indexing(inplace)" , tokens_np , reverse_np , numpy_lut_indexing_inplace , args .time_limit )
279+
280+ # NumPy take allocating
281+ if name_matches ("numpy.take(new)" , pattern ):
282+ bench_translate ("numpy.take(new)" , tokens_np , reverse_np , numpy_lut_take_allocating , args .time_limit )
283+
284+ # NumPy take in-place
285+ if name_matches ("numpy.take(inplace)" , pattern ):
286+ bench_translate ("numpy.take(inplace)" , tokens_np , reverse_np , numpy_lut_take_inplace , args .time_limit )
287+
288+ # StringZilla allocating
289+ if name_matches ("stringzilla.translate(new)" , pattern ):
290+ bench_translate ("stringzilla.translate(new)" , tokens_b , reverse , sz_translate_allocating , args .time_limit )
291+
292+ # StringZilla in-place (need memoryviews for each token)
293+ if name_matches ("stringzilla.translate(inplace)" , pattern ):
294+ bench_translate ("stringzilla.translate(inplace)" , tokens_mv , reverse , sz_translate_inplace , args .time_limit )
222295
223296 # ---------------- Random byte generation ----------------
224- print ("\n === Random Byte Generation ===" )
297+ print ()
298+ print ("--- Random Byte Generation ---" )
225299 sizes = sizes_from_tokens (tokens_b )
226300
227301 if name_matches ("pycryptodome.AES-CTR" , pattern ):
0 commit comments