Skip to content

Commit 9e6b7e2

Browse files
author
Luke Shaw
committed
Reformatting example for b2-hdf5
1 parent 097481d commit 9e6b7e2

File tree

1 file changed

+23
-16
lines changed

1 file changed

+23
-16
lines changed

examples/blosc2_hdf5_compression.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
#######################################################################
32
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
43
# All rights reserved.
@@ -8,18 +7,19 @@
87
#######################################################################
98

109
# This shows how to convert a generic .h5 file to a custom blosc2-compressed .h5 file
11-
# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one
10+
# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one
1211
# has to go a different route for more bespoke compression
1312

1413
import os
15-
import blosc2
14+
1615
import h5py
1716
import hdf5plugin
18-
import numpy as np
17+
18+
import blosc2
1919

2020
clevel = 5 # compression level, e.g., 0-9, where 0 is no compression and 9 is maximum compression
2121
fname_in = "kevlar.h5" # input file with the kevlar dataset
22-
fname_out = f"kevlar-blosc2.h5"
22+
fname_out = "kevlar-blosc2.h5"
2323
if not os.path.exists(fname_in):
2424
raise FileNotFoundError(
2525
f"Input file {fname_in} does not exist\n"
@@ -47,22 +47,24 @@
4747
# Example 2
4848
# For other codecs (e.g grok) or for more custom compression such as with user-defined block shapes, one
4949
# has to use a more involved route
50-
blocks = (50,80,80)
51-
chunks = (200,240,240)
50+
blocks = (50, 80, 80)
51+
chunks = (200, 240, 240)
5252
cparams = {
5353
"codec": blosc2.Codec.LZ4,
5454
"filters": [blosc2.Filter.BITSHUFFLE],
5555
"splitmode": blosc2.SplitMode.NEVER_SPLIT,
56-
"clevel": clevel
56+
"clevel": clevel,
5757
}
5858

59-
if os.path.exists("dset.b2nd"): # don't reload dset to blosc2 if already done so once
59+
if os.path.exists("dset.b2nd"): # don't reload dset to blosc2 if already done so once
6060
b2im = blosc2.open(urlpath="dset.b2nd", mode="r")
6161
s, d = b2im.shape, b2im.dtype
6262
else:
63-
with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array
63+
with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array
6464
dset = fr["/entry/data/data"][:]
65-
b2im = blosc2.asarray(dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w")
65+
b2im = blosc2.asarray(
66+
dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w"
67+
)
6668
d = dset.dtype
6769
del dset
6870

@@ -80,9 +82,14 @@
8082
# Write individual blosc2 chunks directly to hdf5
8183
# hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks)
8284
for info in b2im.iterchunks_info():
83-
ncoords = tuple(n*chunks[i] for i,n in enumerate(info.coords))
84-
aux = blosc2.empty(shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks,
85-
dtype=b2im.dtype) # very cheap memory allocation
86-
aux.schunk.insert_chunk(0, b2im.get_chunk(info.nchunk)) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required)
87-
dset_out.id.write_direct_chunk(ncoords, aux.schunk.to_cframe()) # convert schunk to cframe and write to hdf5
85+
ncoords = tuple(n * chunks[i] for i, n in enumerate(info.coords))
86+
aux = blosc2.empty(
87+
shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks, dtype=b2im.dtype
88+
) # very cheap memory allocation
89+
aux.schunk.insert_chunk(
90+
0, b2im.get_chunk(info.nchunk)
91+
) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required)
92+
dset_out.id.write_direct_chunk(
93+
ncoords, aux.schunk.to_cframe()
94+
) # convert schunk to cframe and write to hdf5
8895
print("Successfully compressed file with custom parameters")

0 commit comments

Comments
 (0)