1
1
/*
2
- * This file is an example of an HDF5 filter plugin.
3
2
* The filter function H5Z_filter_bshuf was adopted from
4
3
* https://github.com/kiyo-masui/bitshuffle
5
4
* The plugin can be used with the HDF5 library vesrion 1.8.11+ to read
6
5
* HDF5 datasets filtered with bshuf.
7
6
*/
8
7
9
8
/*
9
+ * Bitshuffle HDF5 filter
10
10
*
11
11
* This file is part of Bitshuffle
12
12
* Author: Kiyoshi Masui <[email protected] >
13
- * Website: http ://www.github.com/kiyo-masui/bitshuffle
13
+ * Website: https ://www.github.com/kiyo-masui/bitshuffle
14
14
* Created: 2014
15
15
*
16
16
* See LICENSE file for details about copyright and rights to use.
17
17
*
18
+ *
19
+ * Header File
20
+ *
21
+ * Filter Options
22
+ * --------------
23
+ * block_size (option slot 0) : integer (optional)
24
+ * What block size to use (in elements not bytes). Default is 0,
25
+ * for which bitshuffle will pick a block size with a target of 8kb.
26
+ * Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
27
+ * Whether to apply LZ4 compression to the data after bitshuffling.
28
+ * This is much faster than applying compression as a second filter
29
+ * because it is done when the small block of data is already in the
30
+ * L1 cache.
31
+ *
32
+ * For LZ4 compression, the compressed format of the data is the same as
33
+ * for the normal LZ4 filter described in
34
+ * https://support.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
35
+ *
18
36
*/
19
37
20
38
59
77
#include "bitshuffle.h"
60
78
61
79
#define BSHUF_H5_COMPRESS_LZ4 2
80
+ #define BSHUF_H5_COMPRESS_ZSTD 3
62
81
63
82
/* Filter ID registered with the HDF Group */
64
83
#define BSHUF_H5FILTER 32008
65
84
66
- #if 0
67
- #if defined(__GNUC__ )
68
- #define PUSH_ERR (func , minor , str , ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str, ##__VA_ARGS__)
69
- #elif defined(_MSC_VER )
70
- #define PUSH_ERR (func , minor , str , ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str, __VA_ARGS__)
71
- #else
72
- /* This version is portable but it's better to use compiler-supported
73
- approaches for handling the trailing comma issue when possible. */
74
- #define PUSH_ERR (func , minor , ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, __VA_ARGS__)
75
- #endif /* defined(__GNUC__) */
76
- #else
77
85
#define PUSH_ERR (func , minor , str ) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str)
78
- #endif
79
86
80
87
// Prototypes from bitshuffle.c
81
88
void bshuf_write_uint64_BE (void * buf , uint64_t num );
@@ -136,7 +143,9 @@ herr_t H5Z_bshuf_set_local(hid_t dcpl, hid_t type, hid_t space) {
136
143
values [1 ] = BSHUF_VERSION_MINOR ;
137
144
138
145
elem_size = H5Tget_size (type );
146
+ #ifdef BSHUF_DEBUG
139
147
fprintf (stderr , "BSHUF: Computed elem_size %d\n" , elem_size );
148
+ #endif
140
149
if (elem_size <= 0 ) {
141
150
PUSH_ERR ("bshuf_h5_set_local" , H5E_CALLBACK , "Invalid element size." );
142
151
return -1 ;
@@ -159,6 +168,10 @@ herr_t H5Z_bshuf_set_local(hid_t dcpl, hid_t type, hid_t space) {
159
168
break ;
160
169
case BSHUF_H5_COMPRESS_LZ4 :
161
170
break ;
171
+ #ifdef ZSTD_SUPPORT
172
+ case BSHUF_H5_COMPRESS_ZSTD :
173
+ break ;
174
+ #endif
162
175
default :
163
176
PUSH_ERR ("bshuf_h5_set_local" , H5E_CALLBACK , "Invalid bitshuffle compression." );
164
177
}
@@ -176,7 +189,7 @@ size_t H5Z_bshuf_filter(unsigned int flags, size_t cd_nelmts,
176
189
size_t * buf_size , void * * buf ) {
177
190
178
191
size_t size , elem_size ;
179
- int err ;
192
+ int err = -1 ;
180
193
char msg [80 ];
181
194
size_t block_size = 0 ;
182
195
size_t buf_size_out , nbytes_uncomp , nbytes_out ;
@@ -189,14 +202,25 @@ size_t H5Z_bshuf_filter(unsigned int flags, size_t cd_nelmts,
189
202
return 0 ;
190
203
}
191
204
elem_size = cd_values [2 ];
205
+ #ifdef ZSTD_SUPPORT
206
+ const int comp_lvl = cd_values [5 ];
207
+ #endif
192
208
193
209
// User specified block size.
194
210
if (cd_nelmts > 3 ) block_size = cd_values [3 ];
195
211
196
212
if (block_size == 0 ) block_size = bshuf_default_block_size (elem_size );
197
213
214
+ #ifndef ZSTD_SUPPORT
215
+ if (cd_nelmts > 4 && (cd_values [4 ] == BSHUF_H5_COMPRESS_ZSTD )) {
216
+ PUSH_ERR ("bshuf_h5_filter" , H5E_CALLBACK ,
217
+ "ZSTD compression filter chosen but ZSTD support not installed." );
218
+ return 0 ;
219
+ }
220
+ #endif
221
+
198
222
// Compression in addition to bitshiffle.
199
- if (cd_nelmts > 4 && cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 ) {
223
+ if (cd_nelmts > 4 && ( cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 || cd_values [ 4 ] == BSHUF_H5_COMPRESS_ZSTD ) ) {
200
224
if (flags & H5Z_FLAG_REVERSE ) {
201
225
// First eight bytes is the number of bytes in the output buffer,
202
226
// little endian.
@@ -209,8 +233,17 @@ size_t H5Z_bshuf_filter(unsigned int flags, size_t cd_nelmts,
209
233
}
210
234
else {
211
235
nbytes_uncomp = nbytes ;
212
- buf_size_out = bshuf_compress_lz4_bound (nbytes_uncomp / elem_size ,
213
- elem_size , block_size ) + 12 ;
236
+ // Pick which compressions library to use
237
+ if (cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 ) {
238
+ buf_size_out = bshuf_compress_lz4_bound (nbytes_uncomp / elem_size ,
239
+ elem_size , block_size ) + 12 ;
240
+ }
241
+ #ifdef ZSTD_SUPPORT
242
+ else if (cd_values [4 ] == BSHUF_H5_COMPRESS_ZSTD ) {
243
+ buf_size_out = bshuf_compress_zstd_bound (nbytes_uncomp / elem_size ,
244
+ elem_size , block_size ) + 12 ;
245
+ }
246
+ #endif
214
247
}
215
248
}
216
249
else {
@@ -233,10 +266,18 @@ size_t H5Z_bshuf_filter(unsigned int flags, size_t cd_nelmts,
233
266
return 0 ;
234
267
}
235
268
236
- if (cd_nelmts > 4 && cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 ) {
269
+ if (cd_nelmts > 4 && ( cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 || cd_values [ 4 ] == BSHUF_H5_COMPRESS_ZSTD ) ) {
237
270
if (flags & H5Z_FLAG_REVERSE ) {
238
271
// Bit unshuffle/decompress.
239
- err = bshuf_decompress_lz4 (in_buf , out_buf , size , elem_size , block_size );
272
+ // Pick which compressions library to use
273
+ if (cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 ) {
274
+ err = bshuf_decompress_lz4 (in_buf , out_buf , size , elem_size , block_size );
275
+ }
276
+ #ifdef ZSTD_SUPPORT
277
+ else if (cd_values [4 ] == BSHUF_H5_COMPRESS_ZSTD ) {
278
+ err = bshuf_decompress_zstd (in_buf , out_buf , size , elem_size , block_size );
279
+ }
280
+ #endif
240
281
nbytes_out = nbytes_uncomp ;
241
282
}
242
283
else {
@@ -248,15 +289,32 @@ size_t H5Z_bshuf_filter(unsigned int flags, size_t cd_nelmts,
248
289
// have the same representation.
249
290
bshuf_write_uint64_BE (out_buf , nbytes_uncomp );
250
291
bshuf_write_uint32_BE ((char * ) out_buf + 8 , block_size * elem_size );
251
- err = bshuf_compress_lz4 (in_buf , (char * ) out_buf + 12 , size ,
252
- elem_size , block_size ); nbytes_out = err + 12 ; } } else {
253
- if (flags & H5Z_FLAG_REVERSE ) {
292
+ if (cd_values [4 ] == BSHUF_H5_COMPRESS_LZ4 ) {
293
+ err = bshuf_compress_lz4 (in_buf , (char * ) out_buf + 12 , size ,
294
+ elem_size , block_size );
295
+ }
296
+ #ifdef ZSTD_SUPPORT
297
+ else if (cd_values [4 ] == BSHUF_H5_COMPRESS_ZSTD ) {
298
+ err = bshuf_compress_zstd (in_buf , (char * ) out_buf + 12 , size ,
299
+ elem_size , block_size , comp_lvl );
300
+ }
301
+ #endif
302
+ nbytes_out = err + 12 ;
303
+ }
304
+ }
305
+ else {
306
+ if (flags & H5Z_FLAG_REVERSE ) {
254
307
// Bit unshuffle.
255
308
err = bshuf_bitunshuffle (in_buf , out_buf , size , elem_size ,
256
- block_size ); } else {
309
+ block_size );
310
+ }
311
+ else {
257
312
// Bit shuffle.
258
313
err = bshuf_bitshuffle (in_buf , out_buf , size , elem_size ,
259
- block_size ); } nbytes_out = nbytes ; }
314
+ block_size );
315
+ }
316
+ nbytes_out = nbytes ;
317
+ }
260
318
//printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
261
319
//nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
262
320
0 commit comments