|
1 | | - |
2 | 1 | ####################################################################### |
3 | 2 | # Copyright (c) 2019-present, Blosc Development Team <[email protected]> |
4 | 3 | # All rights reserved. |
|
8 | 7 | ####################################################################### |
9 | 8 |
|
10 | 9 | # This shows how to convert a generic .h5 file to a custom blosc2-compressed .h5 file |
11 | | -# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one |
| 10 | +# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one |
12 | 11 | # has to go a different route for more bespoke compression |
13 | 12 |
|
14 | 13 | import os |
15 | | -import blosc2 |
| 14 | + |
16 | 15 | import h5py |
17 | 16 | import hdf5plugin |
18 | | -import numpy as np |
| 17 | + |
| 18 | +import blosc2 |
19 | 19 |
|
20 | 20 | clevel = 5 # compression level, e.g., 0-9, where 0 is no compression and 9 is maximum compression |
21 | 21 | fname_in = "kevlar.h5" # input file with the kevlar dataset |
22 | | -fname_out = f"kevlar-blosc2.h5" |
| 22 | +fname_out = "kevlar-blosc2.h5" |
23 | 23 | if not os.path.exists(fname_in): |
24 | 24 | raise FileNotFoundError( |
25 | 25 | f"Input file {fname_in} does not exist\n" |
|
47 | 47 | # Example 2 |
48 | 48 | # For other codecs (e.g grok) or for more custom compression such as with user-defined block shapes, one |
49 | 49 | # has to use a more involved route |
50 | | -blocks = (50,80,80) |
51 | | -chunks = (200,240,240) |
| 50 | +blocks = (50, 80, 80) |
| 51 | +chunks = (200, 240, 240) |
52 | 52 | cparams = { |
53 | 53 | "codec": blosc2.Codec.LZ4, |
54 | 54 | "filters": [blosc2.Filter.BITSHUFFLE], |
55 | 55 | "splitmode": blosc2.SplitMode.NEVER_SPLIT, |
56 | | - "clevel": clevel |
| 56 | + "clevel": clevel, |
57 | 57 | } |
58 | 58 |
|
59 | | -if os.path.exists("dset.b2nd"): # don't reload dset to blosc2 if already done so once |
| 59 | +if os.path.exists("dset.b2nd"): # don't reload dset to blosc2 if already done so once |
60 | 60 | b2im = blosc2.open(urlpath="dset.b2nd", mode="r") |
61 | 61 | s, d = b2im.shape, b2im.dtype |
62 | 62 | else: |
63 | | - with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array |
| 63 | + with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array |
64 | 64 | dset = fr["/entry/data/data"][:] |
65 | | - b2im = blosc2.asarray(dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w") |
| 65 | + b2im = blosc2.asarray( |
| 66 | + dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w" |
| 67 | + ) |
66 | 68 | d = dset.dtype |
67 | 69 | del dset |
68 | 70 |
|
|
80 | 82 | # Write individual blosc2 chunks directly to hdf5 |
81 | 83 | # hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks) |
82 | 84 | for info in b2im.iterchunks_info(): |
83 | | - ncoords = tuple(n*chunks[i] for i,n in enumerate(info.coords)) |
84 | | - aux = blosc2.empty(shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks, |
85 | | - dtype=b2im.dtype) # very cheap memory allocation |
86 | | - aux.schunk.insert_chunk(0, b2im.get_chunk(info.nchunk)) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required) |
87 | | - dset_out.id.write_direct_chunk(ncoords, aux.schunk.to_cframe()) # convert schunk to cframe and write to hdf5 |
| 85 | + ncoords = tuple(n * chunks[i] for i, n in enumerate(info.coords)) |
| 86 | + aux = blosc2.empty( |
| 87 | + shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks, dtype=b2im.dtype |
| 88 | + ) # very cheap memory allocation |
| 89 | + aux.schunk.insert_chunk( |
| 90 | + 0, b2im.get_chunk(info.nchunk) |
| 91 | + ) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required) |
| 92 | + dset_out.id.write_direct_chunk( |
| 93 | + ncoords, aux.schunk.to_cframe() |
| 94 | + ) # convert schunk to cframe and write to hdf5 |
88 | 95 | print("Successfully compressed file with custom parameters") |
0 commit comments