Skip to content

Commit 09bc21d

Browse files
committed
Preliminary benchmark for sharded writes
1 parent 4082888 commit 09bc21d

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

bench/write_shard.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import itertools
2+
import os.path
3+
import shutil
4+
import sys
5+
import tempfile
6+
import timeit
7+
8+
import line_profiler
9+
import numpy as np
10+
11+
import zarr
12+
import zarr.codecs
13+
import zarr.codecs.sharding
14+
15+
if __name__ == "__main__":
16+
sys.path.insert(0, "..")
17+
18+
# setup
19+
with tempfile.TemporaryDirectory() as path:
20+
21+
ndim = 3
22+
opt = {
23+
'shape': [1024]*ndim,
24+
'chunks': [128]*ndim,
25+
'shards': [512]*ndim,
26+
'dtype': np.float64,
27+
}
28+
29+
store = zarr.storage.LocalStore(path)
30+
z = zarr.create_array(store, **opt)
31+
print(z)
32+
33+
def cleanup() -> None:
34+
for elem in os.listdir(path):
35+
elem = os.path.join(path, elem)
36+
if not elem.endswith(".json"):
37+
if os.path.isdir(elem):
38+
shutil.rmtree(elem)
39+
else:
40+
os.remove(elem)
41+
42+
def write() -> None:
43+
wchunk = [512]*ndim
44+
nwchunks = [n//s for n, s in zip(opt['shape'], wchunk, strict=True)]
45+
for shard in itertools.product(*(range(n) for n in nwchunks)):
46+
slicer = tuple(
47+
slice(i*n, (i+1)*n)
48+
for i, n in zip(shard, wchunk, strict=True)
49+
)
50+
d = np.random.rand(*wchunk).astype(opt['dtype'])
51+
z[slicer] = d
52+
53+
print("*" * 79)
54+
55+
# time
56+
vars = {"write": write, "cleanup": cleanup, "z": z, "opt": opt}
57+
t = timeit.repeat("write()", "cleanup()", repeat=2, number=1, globals=vars)
58+
print(t)
59+
print(min(t))
60+
print(z)
61+
62+
# profile
63+
# f = zarr.codecs.sharding.ShardingCodec._encode_partial_single
64+
# profile = line_profiler.LineProfiler(f)
65+
# profile.run("write()")
66+
# profile.print_stats()

0 commit comments

Comments
 (0)