Skip to content

Commit acb5f88

Browse files
committed
Fast path for UDFs and new benchmarks
1 parent cb6c751 commit acb5f88

File tree

3 files changed

+98
-1
lines changed

3 files changed

+98
-1
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
from time import time
10+
import os
11+
12+
import numpy as np
13+
import memray
14+
15+
import blosc2
16+
17+
N = 100_000_000
18+
19+
20+
def info(a, t1):
21+
size = a.schunk.nbytes
22+
csize = a.schunk.cbytes
23+
print(
24+
f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)"
25+
f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)"
26+
)
27+
28+
29+
def run_benchmark():
30+
shape = (N,)
31+
shape = (100, 1000, 1000)
32+
print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***")
33+
t0 = time()
34+
#a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w")
35+
a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w")
36+
elapsed = time() - t0
37+
info(a, elapsed)
38+
return a
39+
40+
41+
# Check if we're being tracked by memray
42+
if not os.environ.get("MEMRAY_TRACKING", False):
43+
# Run the benchmark with memray tracking
44+
output_file = "array_constructor_memray.bin"
45+
print(f"Starting memray profiling. Results will be saved to {output_file}")
46+
47+
with memray.Tracker(output_file):
48+
array = run_benchmark()
49+
50+
print(f"\nMemray profiling completed. To view results, run:")
51+
print(f"memray flamegraph {output_file}")
52+
print(f"# or")
53+
print(f"memray summary {output_file}")
54+
print(f"# or")
55+
print(f"memray tree {output_file}")
56+
else:
57+
# We're already being tracked by memray
58+
run_benchmark()

bench/ndarray/array-constructor.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
from time import time
10+
11+
import numpy as np
12+
13+
import blosc2
14+
15+
N = 100_000_000
16+
17+
def info(a, t1):
18+
size = a.schunk.nbytes
19+
csize = a.schunk.cbytes
20+
print(
21+
f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)"
22+
f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)"
23+
)
24+
25+
26+
shape = (N,)
27+
shape = (100, 1000, 1000)
28+
print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***")
29+
t0 = time()
30+
# a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w")
31+
a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w")
32+
info(a, time() - t0)

src/blosc2/ndarray.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,14 @@ def reshape(
334334
slice_size = src_slice.stop - src_slice.start
335335
dst_buf_slice = slice(dst_buf_len, dst_buf_len + slice_size)
336336
dst_buf_len += slice_size
337-
dst_buf[dst_buf_slice] = src[src_slice]
337+
if hasattr(src, "res_getitem"):
338+
# Fast path for lazy UDFs (important for e.g. arange or linspace)
339+
# This essentially avoids the need to create a new,
340+
# potentially large NumPy array in memory.
341+
# This is not critical for Linux, but it is for Windows/Mac.
342+
dst_buf[dst_buf_slice] = src.res_getitem[src_slice]
343+
else:
344+
dst_buf[dst_buf_slice] = src[src_slice]
338345
# Compute the shape of dst_slice
339346
dst_slice_shape = tuple(s.stop - s.start for s in dst_slice)
340347
# ... and assign the buffer to the destination array

0 commit comments

Comments
 (0)