Skip to content

Commit e78d2a6

Browse files
authored
Merge pull request #427 from Blosc/addStack
Add stack and expand_dims
2 parents 51e801f + 55a86c3 commit e78d2a6

File tree

8 files changed

+399
-1
lines changed

8 files changed

+399
-1
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ else()
5050
include(FetchContent)
5151
FetchContent_Declare(blosc2
5252
GIT_REPOSITORY https://github.com/Blosc/c-blosc2
53-
GIT_TAG 1c2f8bb0c914c43e23b751fbcf6642cd7aec09db # v2.18.0 (concatenate added)
53+
GIT_TAG 22b828b412391a8cbeeb97415777c5beecef73ff # v2.18.1 (expand_dims added)
5454
)
5555
FetchContent_MakeAvailable(blosc2)
5656
include_directories("${blosc2_SOURCE_DIR}/include")

bench/ndarray/stack.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
import numpy as np
10+
import blosc2
11+
import time
12+
import matplotlib.pyplot as plt
13+
import os
14+
from matplotlib.ticker import ScalarFormatter
15+
16+
17+
def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0,
18+
dtype=np.float64, datadist="linspace", codec=blosc2.Codec.ZSTD):
19+
"""
20+
Benchmark blosc2.stack performance with different chunk alignments.
21+
22+
Parameters:
23+
- num_arrays: Number of arrays to stack
24+
- size: Base size for array dimensions
25+
- aligned_chunks: Whether to use aligned chunk shapes
26+
- axis: Axis along which to stack (-3, -2, -1, 0, 1, 2)
27+
- dtype: Data type for the arrays (default is np.float64)
28+
- datadist: Distribution of data in arrays (default is "linspace")
29+
- codec: Codec to use for compression (default is blosc2.Codec.ZSTD)
30+
31+
Returns:
32+
- duration: Time taken in seconds
33+
- result_shape: Shape of the resulting array
34+
- data_size_gb: Size of data processed in GB
35+
"""
36+
if axis not in (-3, -2, -1, 0, 1, 2):
37+
raise ValueError("Only axis 0 (-3), 1 (-2) and 2 (-1) are supported")
38+
shapes = [(size, size // num_arrays) for _ in range(num_arrays)] # shape same for all arrays
39+
40+
# Create appropriate chunk shapes
41+
chunks, blocks = blosc2.compute_chunks_blocks(shapes[0], dtype=dtype, cparams=blosc2.CParams(codec=codec))
42+
if aligned_chunks:
43+
# Aligned chunks: divisors of the shape dimensions
44+
chunk_shapes = [(chunks[0], chunks[1]) for shape in shapes]
45+
else:
46+
# Unaligned chunks: not divisors of shape dimensions
47+
chunk_shapes = [(chunks[0] + 1, chunks[1] - 1) for shape in shapes]
48+
49+
# Create arrays
50+
arrays = []
51+
for i, (shape, chunk_shape) in enumerate(zip(shapes, chunk_shapes)):
52+
if datadist == "linspace":
53+
# Create arrays with linearly spaced values
54+
arr = blosc2.linspace(i, i + 1, num=np.prod(shape),
55+
dtype=dtype, shape=shape, chunks=chunk_shape,
56+
cparams=blosc2.CParams(codec=codec))
57+
else:
58+
# Default to arange for simplicity
59+
arr = blosc2.arange(
60+
i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype, shape=shape, chunks=chunk_shape,
61+
cparams=blosc2.CParams(codec=codec)
62+
)
63+
arrays.append(arr)
64+
65+
# Calculate total data size in GB (4 bytes per int32)
66+
total_elements = sum(np.prod(shape) for shape in shapes)
67+
data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB
68+
69+
# Time the stack
70+
start_time = time.time()
71+
result = blosc2.stack(arrays, axis=axis, cparams=blosc2.CParams(codec=codec))
72+
duration = time.time() - start_time
73+
74+
return duration, result.shape, data_size_gb
75+
76+
77+
def run_numpy_benchmark(num_arrays=10, size=500, axis=0, dtype=np.float64, datadist="linspace"):
78+
"""
79+
Benchmark numpy.stack performance for comparison.
80+
81+
Parameters:
82+
- num_arrays: Number of arrays to stack
83+
- size: Base size for array dimensions
84+
- axis: Axis along which to stack (-3, -2, -1, 0, 1, 2)
85+
- dtype: Data type for the arrays (default is np.float64)
86+
- datadist: Distribution of data in arrays (default is "linspace")
87+
88+
Returns:
89+
- duration: Time taken in seconds
90+
- result_shape: Shape of the resulting array
91+
- data_size_gb: Size of data processed in GB
92+
"""
93+
if axis not in (-3, -2, -1, 0, 1, 2):
94+
raise ValueError("Only axis 0 (-3), 1 (-2) and 2 (-1) are supported")
95+
shapes = [(size, size // num_arrays) for _ in range(num_arrays)] # shape same for all arrays
96+
97+
# Create arrays
98+
numpy_arrays = []
99+
for i, shape in enumerate(shapes):
100+
if datadist == "linspace":
101+
# Create arrays with linearly spaced values
102+
arr = np.linspace(i, i + 1, num=np.prod(shape), dtype=dtype).reshape(shape)
103+
else:
104+
arr = np.arange(i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype).reshape(shape)
105+
numpy_arrays.append(arr)
106+
107+
# Calculate total data size in GB (4 bytes per int32)
108+
total_elements = sum(np.prod(shape) for shape in shapes)
109+
data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB
110+
111+
# Time the concatenation
112+
start_time = time.time()
113+
result = np.stack(numpy_arrays, axis=axis)
114+
duration = time.time() - start_time
115+
116+
return duration, result.shape, data_size_gb
117+
118+
119+
def create_combined_plot(num_arrays, sizes, numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0,
120+
numpy_speeds_axism1, unaligned_speeds_axism1, aligned_speeds_axism1, output_dir="plots",
121+
datadist="linspace", codec_str="LZ4", axes=(0, -1)):
122+
"""
123+
Create a figure with two side-by-side bar plots comparing the performance for both axes.
124+
125+
Parameters:
126+
- sizes: List of array sizes
127+
- *_speeds_axis0: Lists of speeds (GB/s) for axis 0 stack
128+
- *_speeds_axism1: Lists of speeds (GB/s) for axis -1 stack
129+
- output_dir: Directory to save the plot
130+
"""
131+
# Create output directory if it doesn't exist
132+
os.makedirs(output_dir, exist_ok=True)
133+
134+
# Set up the figure with two subplots side by side
135+
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20, 8), sharey=True)
136+
137+
# Convert sizes to strings for the x-axis
138+
x_labels = [str(size) for size in sizes]
139+
x = np.arange(len(sizes))
140+
width = 0.25
141+
142+
# Create bars for axis 0 plot
143+
rect1_axis0 = ax0.bar(x - width, numpy_speeds_axis0, width, label='NumPy', color='#1f77b4')
144+
rect2_axis0 = ax0.bar(x, unaligned_speeds_axis0, width, label='Blosc2 Unaligned', color='#ff7f0e')
145+
rect3_axis0 = ax0.bar(x + width, aligned_speeds_axis0, width, label='Blosc2 Aligned', color='#2ca02c')
146+
147+
# Create bars for axis 1 plot
148+
rect1_axis1 = ax1.bar(x - width, numpy_speeds_axism1, width, label='NumPy', color='#1f77b4')
149+
rect2_axis1 = ax1.bar(x, unaligned_speeds_axism1, width, label='Blosc2 Unaligned', color='#ff7f0e')
150+
rect3_axis1 = ax1.bar(x + width, aligned_speeds_axism1, width, label='Blosc2 Aligned', color='#2ca02c')
151+
152+
# Add labels and titles
153+
for ax, axis in [(ax0, axes[0]), (ax1, axes[1])]:
154+
ax.set_xlabel('Array Size (N for NxN array)', fontsize=12)
155+
ax.set_title(f'Stack Performance for {num_arrays} arrays (axis={axis}) [{datadist}, {codec_str}]', fontsize=14)
156+
ax.set_xticks(x)
157+
ax.set_xticklabels(x_labels)
158+
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
159+
ax.yaxis.set_major_formatter(ScalarFormatter(useOffset=False))
160+
161+
# Add legend inside each plot
162+
ax.legend(title="Stack Methods",
163+
loc='upper left',
164+
fontsize=12,
165+
frameon=True,
166+
facecolor='white',
167+
edgecolor='black',
168+
framealpha=0.8)
169+
170+
# Add y-label only to the left subplot
171+
ax0.set_ylabel('Throughput (GB/s)', fontsize=12)
172+
173+
# Add value labels on top of the bars
174+
def autolabel(rects, ax):
175+
for rect in rects:
176+
height = rect.get_height()
177+
ax.annotate(f'{height:.2f} GB/s',
178+
xy=(rect.get_x() + rect.get_width() / 2, height),
179+
xytext=(0, 3), # 3 points vertical offset
180+
textcoords="offset points",
181+
ha='center', va='bottom', rotation=90, fontsize=8)
182+
183+
autolabel(rect1_axis0, ax0)
184+
autolabel(rect2_axis0, ax0)
185+
autolabel(rect3_axis0, ax0)
186+
187+
autolabel(rect1_axis1, ax1)
188+
autolabel(rect2_axis1, ax1)
189+
autolabel(rect3_axis1, ax1)
190+
191+
# Save the plot
192+
plt.tight_layout()
193+
plt.savefig(os.path.join(output_dir, 'concatenate_benchmark_combined.png'), dpi=100)
194+
plt.show()
195+
plt.close()
196+
197+
print(f"Combined plot saved to {os.path.join(output_dir, 'concatenate_benchmark_combined.png')}")
198+
199+
200+
def main():
201+
# Parameters
202+
sizes = [500, 1000, 2000, 4000, 10000] #, 20000] # Sizes of arrays to test
203+
num_arrays = 10
204+
dtype = np.float64 # Data type for arrays
205+
datadist = "linspace" # Distribution of data in arrays
206+
codec = blosc2.Codec.LZ4
207+
codec_str = str(codec).split('.')[-1]
208+
print(f"{'=' * 70}")
209+
print(f"Blosc2 vs NumPy stack benchmark with {codec_str} codec")
210+
print(f"{'=' * 70}")
211+
212+
213+
# Lists to store results for both axes
214+
numpy_speeds_axis0 = []
215+
unaligned_speeds_axis0 = []
216+
aligned_speeds_axis0 = []
217+
numpy_speeds_axism1 = []
218+
unaligned_speeds_axism1 = []
219+
aligned_speeds_axism1 = []
220+
221+
for axis in [0, -1]:
222+
print(f"\nStacking {num_arrays} arrays along axis {axis} with data distribution '{datadist}' ")
223+
print(f"{'Size':<8} {'NumPy (GB/s)':<14} {'Unaligned (GB/s)':<18} "
224+
f"{'Aligned (GB/s)':<16} {'Alig vs Unalig':<16} {'Alig vs NumPy':<16}")
225+
print(f"{'-' * 90}")
226+
227+
for size in sizes:
228+
# Run the benchmarks
229+
numpy_time, numpy_shape, data_size_gb = run_numpy_benchmark(num_arrays, size, axis=axis, dtype=dtype)
230+
unaligned_time, shape1, _ = run_benchmark(num_arrays, size, aligned_chunks=False, axis=axis,
231+
dtype=dtype, datadist=datadist, codec=codec)
232+
aligned_time, shape2, _ = run_benchmark(num_arrays, size, aligned_chunks=True, axis=axis,
233+
dtype=dtype, datadist=datadist, codec=codec)
234+
235+
# Calculate throughputs in GB/s
236+
numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float("inf")
237+
unaligned_speed = data_size_gb / unaligned_time if unaligned_time > 0 else float("inf")
238+
aligned_speed = data_size_gb / aligned_time if aligned_time > 0 else float("inf")
239+
240+
# Store speeds in the appropriate list
241+
if axis == 0:
242+
numpy_speeds_axis0.append(numpy_speed)
243+
unaligned_speeds_axis0.append(unaligned_speed)
244+
aligned_speeds_axis0.append(aligned_speed)
245+
else:
246+
numpy_speeds_axism1.append(numpy_speed)
247+
unaligned_speeds_axism1.append(unaligned_speed)
248+
aligned_speeds_axism1.append(aligned_speed)
249+
250+
# Calculate speedup ratios
251+
aligned_vs_unaligned = aligned_speed / unaligned_speed if unaligned_speed > 0 else float("inf")
252+
aligned_vs_numpy = aligned_speed / numpy_speed if numpy_speed > 0 else float("inf")
253+
254+
# Print results
255+
print(f"{size:<10} {numpy_speed:<14.2f} {unaligned_speed:<18.2f} {aligned_speed:<16.2f} "
256+
f"{aligned_vs_unaligned:>10.2f}x {aligned_vs_numpy:>10.2f}x")
257+
258+
# Quick verification of result shape
259+
if axis == 0:
260+
expected_shape = (10, size, size // num_arrays) # After concatenation along axis 0
261+
else:
262+
expected_shape = (size, size // num_arrays, 10) # After concatenation along axis - 1
263+
264+
# Verify shapes match
265+
shapes = [numpy_shape, shape1, shape2]
266+
if any(shape != expected_shape for shape in shapes):
267+
for i, shape_name in enumerate(["NumPy", "Blosc2 unaligned", "Blosc2 aligned"]):
268+
if shapes[i] != expected_shape:
269+
print(f"Warning: {shape_name} shape {shapes[i]} does not match expected {expected_shape}")
270+
271+
print(f"{'=' * 70}")
272+
273+
# Create the combined plot with both axes
274+
create_combined_plot(
275+
num_arrays,
276+
sizes,
277+
numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0,
278+
numpy_speeds_axism1, unaligned_speeds_axism1, aligned_speeds_axism1,
279+
datadist=datadist, output_dir="plots", codec_str=codec_str,axes=(0, -1)
280+
)
281+
282+
283+
if __name__ == "__main__":
284+
main()

doc/reference/ndarray.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ Constructors
7474
copy
7575
concatenate
7676
empty
77+
expand_dims
7778
frombuffer
7879
fromiter
7980
nans
@@ -86,3 +87,4 @@ Constructors
8687
linspace
8788
eye
8889
reshape
90+
stack

src/blosc2/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ class Tuner(Enum):
239239
reshape,
240240
copy,
241241
concatenate,
242+
expand_dims,
242243
empty,
243244
frombuffer,
244245
fromiter,
@@ -253,6 +254,7 @@ class Tuner(Enum):
253254
permute_dims,
254255
transpose,
255256
matrix_transpose,
257+
stack,
256258
)
257259

258260
from .c2array import c2context, C2Array, URLPath

src/blosc2/blosc2_ext.pyx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ cdef extern from "b2nd.h":
509509
int b2nd_copy(b2nd_context_t *ctx, b2nd_array_t *src, b2nd_array_t **array)
510510
int b2nd_concatenate(b2nd_context_t *ctx, b2nd_array_t *src1, b2nd_array_t *src2,
511511
int8_t axis, c_bool copy, b2nd_array_t **array)
512+
int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t ** view, const int8_t axis)
512513
int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array)
513514

514515
void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index)
@@ -2892,3 +2893,14 @@ def concatenate(arr1: NDArray, arr2: NDArray, axis: int, **kwargs):
28922893
else:
28932894
# Return the first array, which now contains the concatenated data
28942895
return arr1
2896+
2897+
def expand_dims(arr1: NDArray, axis: int):
2898+
"""
2899+
Add new dummy axis to NDArray object at specified dimension.
2900+
"""
2901+
cdef b2nd_array_t *view
2902+
_check_rc(b2nd_expand_dims(arr1.array, &view, axis),
2903+
"Error while concatenating the arrays")
2904+
2905+
return blosc2.NDArray(_schunk=PyCapsule_New(view.sc, <char *> "blosc2_schunk*", NULL),
2906+
_array=PyCapsule_New(view, <char *> "b2nd_array_t*", NULL))

0 commit comments

Comments
 (0)