1414from matplotlib .ticker import ScalarFormatter
1515
1616
17- def run_benchmark (num_arrays = 10 , size = 500 , aligned_chunks = False , axis = 0 ):
17+ def run_benchmark (num_arrays = 10 , size = 500 , aligned_chunks = False , axis = 0 , codec = blosc2 . Codec . ZSTD ):
1818 """
1919 Benchmark blosc2.concatenate performance with different chunk alignments.
2020
@@ -51,6 +51,7 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
5151 for i , (shape , chunk_shape ) in enumerate (zip (shapes , chunk_shapes )):
5252 arr = blosc2 .arange (
5353 i * np .prod (shape ), (i + 1 ) * np .prod (shape ), 1 , dtype = "i4" , shape = shape , chunks = chunk_shape ,
54+ cparams = blosc2 .CParams (codec = codec )
5455 )
5556 arrays .append (arr )
5657
@@ -60,7 +61,7 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
6061
6162 # Time the concatenation
6263 start_time = time .time ()
63- result = blosc2 .concatenate (arrays , axis = axis , cparams = blosc2 .CParams (codec = blosc2 . Codec . BLOSCLZ ))
64+ result = blosc2 .concatenate (arrays , axis = axis , cparams = blosc2 .CParams (codec = codec ))
6465 duration = time .time () - start_time
6566
6667 return duration , result .shape , data_size_gb
@@ -193,12 +194,13 @@ def autolabel(rects, ax):
193194
194195
195196def main ():
196- print (f"{ '=' * 60 } " )
197- print (f"Blosc2 vs NumPy concatenation benchmark" )
198- print (f"{ '=' * 60 } " )
197+ codec = blosc2 .Codec .BLOSCLZ
198+ print (f"{ '=' * 70 } " )
199+ print (f"Blosc2 vs NumPy concatenation benchmark { codec = } " )
200+ print (f"{ '=' * 70 } " )
199201
200202 # Parameters
201- sizes = [500 , 1000 ] # , 2000, 4000] #, 10000] # must be divisible by 4 for aligned chunks
203+ sizes = [500 , 1000 , 2000 , 4000 ] #, 10000] # must be divisible by 4 for aligned chunks
202204 num_arrays = 10
203205
204206 # Lists to store results for both axes
@@ -211,15 +213,15 @@ def main():
211213
212214 for axis in [0 , 1 ]:
213215 print (f"\n Concatenating { num_arrays } arrays along axis { axis } " )
214- print (f"{ 'Size' :<10 } { 'NumPy (GB/s)' :<14} { 'Unaligned (GB/s)' :<18} "
216+ print (f"{ 'Size' :<8 } { 'NumPy (GB/s)' :<14} { 'Unaligned (GB/s)' :<18} "
215217 f"{ 'Aligned (GB/s)' :<16} { 'Alig vs Unalig' :<16} { 'Alig vs NumPy' :<16} " )
216218 print (f"{ '-' * 90 } " )
217219
218220 for size in sizes :
219221 # Run the benchmarks
220222 numpy_time , numpy_shape , data_size_gb = run_numpy_benchmark (num_arrays , size , axis = axis )
221- unaligned_time , shape1 , _ = run_benchmark (num_arrays , size , aligned_chunks = False , axis = axis )
222- aligned_time , shape2 , _ = run_benchmark (num_arrays , size , aligned_chunks = True , axis = axis )
223+ unaligned_time , shape1 , _ = run_benchmark (num_arrays , size , aligned_chunks = False , axis = axis , codec = codec )
224+ aligned_time , shape2 , _ = run_benchmark (num_arrays , size , aligned_chunks = True , axis = axis , codec = codec )
223225
224226 # Calculate throughputs in GB/s
225227 numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float ("inf" )
@@ -257,7 +259,7 @@ def main():
257259 if shapes [i ] != expected_shape :
258260 print (f"Warning: { shape_name } shape { shapes [i ]} does not match expected { expected_shape } " )
259261
260- print (f"{ '=' * 90 } " )
262+ print (f"{ '=' * 70 } " )
261263
262264 # Create the combined plot with both axes
263265 create_combined_plot (
0 commit comments