Skip to content

Commit 660ed63

Browse files
authored
Add blosc2-hdf5 interface example
1 parent d50676b commit 660ed63

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
#######################################################################
3+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
4+
# All rights reserved.
5+
#
6+
# This source code is licensed under a BSD-style license (found in the
7+
# LICENSE file in the root directory of this source tree)
8+
#######################################################################
9+
10+
# This shows how to convert a generic .h5 file to a custom blosc2-compressed .h5 file
11+
# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one
12+
# has to go a different route for more bespoke compression
13+
14+
import os
15+
import blosc2
16+
import h5py
17+
import hdf5plugin
18+
import numpy as np
19+
20+
clevel = 5 # compression level, e.g., 0-9, where 0 is no compression and 9 is maximum compression
21+
fname_in = "kevlar.h5" # input file with the kevlar dataset
22+
fname_out = f"kevlar-blosc2.h5"
23+
if not os.path.exists(fname_in):
24+
raise FileNotFoundError(
25+
f"Input file {fname_in} does not exist\n"
26+
"Please download it from the kevlar repository at:"
27+
" http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5"
28+
)
29+
30+
# Example 1
31+
# hdf5plugin supports limited blosc2 compression with certain codecs
32+
cname = "zstd"
33+
with h5py.File(fname_in, "r") as fr:
34+
dset = fr["/entry/data/data"][:]
35+
with h5py.File(fname_out, "w") as fw:
36+
g = fw.create_group("/data")
37+
b2comp = hdf5plugin.Blosc2(cname=cname, clevel=clevel, filters=hdf5plugin.Blosc2.BITSHUFFLE)
38+
dset_out = g.create_dataset(
39+
f"cname-{cname}",
40+
data=dset[:nframes],
41+
dtype=dset.dtype,
42+
chunks=(1,) + dset.shape[1:], # chunk size of 1 frame
43+
**b2comp,
44+
)
45+
print("Successfully compressed file with hdf5plugin")
46+
47+
# Example 2
48+
# For other codecs (e.g grok) or for more custom compression such as with user-defined block shapes, one
49+
# has to use a more involved route
50+
blocks = (50,80,80)
51+
chunks = (200,240,240)
52+
cparams = {
53+
"codec": blosc2.Codec.LZ4,
54+
"filters": [blosc2.Filter.BITSHUFFLE],
55+
"splitmode": blosc2.SplitMode.NEVER_SPLIT,
56+
"clevel": clevel
57+
}
58+
59+
try: # don't reload dset to blosc2 if already done so once
60+
b2im = blosc2.open(urlpath="dset.b2nd", mode="r")
61+
s, d = b2im.shape, b2im.dtype
62+
except:
63+
with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array
64+
dset = fr["/entry/data/data"][:]
65+
b2im = blosc2.asarray(dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w")
66+
d = dset.dtype
67+
del dset
68+
69+
# Write to .h5 file
70+
with h5py.File(fname_out, "w") as fw:
71+
g = fw.create_group("/data")
72+
b2comp = hdf5plugin.Blosc2() # just for identification, no compression algorithm specified
73+
dset_out = g.create_dataset(
74+
"cname-customlz4",
75+
s,
76+
d,
77+
chunks=chunks, # chunk size of 1 frame
78+
**b2comp,
79+
)
80+
# Write individual blosc2 chunks directly to hdf5
81+
# hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks)
82+
for info in b2im.iterchunks_info():
83+
ncoords = tuple(n*chunks[i] for i,n in enumerate(info.coords))
84+
aux = blosc2.empty(shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks,
85+
dtype=b2im.dtype) # very cheap memory allocation
86+
aux.schunk.insert_chunk(0, b2im.get_chunk(info.nchunk)) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required)
87+
dset_out.id.write_direct_chunk(ncoords, aux.schunk.to_cframe()) # convert schunk to cframe and write to hdf5
88+
print("Successfully compressed file with custom parameters")

0 commit comments

Comments
 (0)