Skip to content

Commit c7717ae

Browse files
committed
Enable byteshuffle for arbitrary number of bytes
1 parent 5b7d426 commit c7717ae

File tree

4 files changed

+119
-13
lines changed

4 files changed

+119
-13
lines changed

blosc/blosc2.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,8 @@ uint8_t* pipeline_forward(struct thread_context* thread_context, const int32_t b
10111011
if (filters[i] <= BLOSC2_DEFINED_FILTERS_STOP) {
10121012
switch (filters[i]) {
10131013
case BLOSC_SHUFFLE:
1014-
blosc2_shuffle(typesize, bsize, _src, _dest);
1014+
// if filters_meta is different to 0, interpret it as grouped bytes to shuffle
1015+
blosc2_shuffle(filters_meta[i] == 0 ? typesize : filters_meta[i], bsize, _src, _dest);
10151016
break;
10161017
case BLOSC_BITSHUFFLE:
10171018
if (blosc2_bitshuffle(typesize, bsize, _src, _dest) < 0) {
@@ -1388,7 +1389,8 @@ int pipeline_backward(struct thread_context* thread_context, const int32_t bsize
13881389
if (filters[i] <= BLOSC2_DEFINED_FILTERS_STOP) {
13891390
switch (filters[i]) {
13901391
case BLOSC_SHUFFLE:
1391-
blosc2_unshuffle(typesize, bsize, _src, _dest);
1392+
// if filters_meta is not 0, interpret as number of bytes to be grouped together for shuffle
1393+
blosc2_unshuffle(filters_meta[i] == 0 ? typesize : filters_meta[i], bsize, _src, _dest);
13921394
break;
13931395
case BLOSC_BITSHUFFLE:
13941396
if (bitunshuffle(typesize, bsize, _src, _dest, context->src[BLOSC2_CHUNK_VERSION]) < 0) {

include/blosc2.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -245,24 +245,24 @@ enum {
245245
*/
246246
enum {
247247
#ifndef BLOSC_H
248-
BLOSC_NOSHUFFLE = 0,
249248
//!< No shuffle (for compatibility with Blosc1).
250-
BLOSC_NOFILTER = 0,
249+
BLOSC_NOSHUFFLE = 0,
251250
//!< No filter.
252-
BLOSC_SHUFFLE = 1,
251+
BLOSC_NOFILTER = 0,
253252
//!< Byte-wise shuffle. `filters_meta` does not have any effect here.
254-
BLOSC_BITSHUFFLE = 2,
253+
BLOSC_SHUFFLE = 1,
255254
//!< Bit-wise shuffle. `filters_meta` does not have any effect here.
255+
BLOSC_BITSHUFFLE = 2,
256256
#endif // BLOSC_H
257+
//!< Delta filter (bitwise XOR relative to reference). `filters_meta` does not have any effect here.
257258
BLOSC_DELTA = 3,
258-
//!< Delta filter. `filters_meta` does not have any effect here.
259-
BLOSC_TRUNC_PREC = 4,
260259
//!< Truncate mantissa precision.
261260
//!< Positive values in `filters_meta` will keep bits; negative values will zero bits.
262-
BLOSC_LAST_FILTER = 5,
261+
BLOSC_TRUNC_PREC = 4,
263262
//!< sentinel
264-
BLOSC_LAST_REGISTERED_FILTER = BLOSC2_GLOBAL_REGISTERED_FILTERS_START + BLOSC2_GLOBAL_REGISTERED_FILTERS - 1,
263+
BLOSC_LAST_FILTER = 5,
265264
//!< Determine the last registered filter. It is used to check if a filter is registered or not.
265+
BLOSC_LAST_REGISTERED_FILTER = BLOSC2_GLOBAL_REGISTERED_FILTERS_START + BLOSC2_GLOBAL_REGISTERED_FILTERS - 1,
266266
};
267267

268268
/**

include/blosc2/filters-registry.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ enum {
2525
BLOSC_FILTER_BYTEDELTA_BUGGY = 34,
2626
// buggy version. See #524
2727
BLOSC_FILTER_BYTEDELTA = 35,
28-
//!< Byteshuffle + delta. The typesize should be specified in the `filters_meta` slot.
29-
//!< Sometimes this can represent an advantage over
30-
//!< @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE.
28+
//!< Byte-wise delta. Assumes M streams of bytes of length N, where M is the typesize (specified by `filters_meta`).
29+
//!< Should be used in combination with @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE.
3130
//!< See https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/
3231
BLOSC_FILTER_INT_TRUNC = 36,
3332
//!< Truncate int precision; positive values in `filters_meta` slot will keep bits;
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*********************************************************************
2+
Blosc - Blocked Shuffling and Compression Library
3+
4+
Copyright (c) 2021 Blosc Development Team <[email protected]>
5+
https://blosc.org
6+
License: BSD 3-Clause (see LICENSE.txt)
7+
8+
See LICENSE.txt for details about copyright and rights to use.
9+
**********************************************************************/
10+
11+
#include "test_common.h"
12+
13+
bool fill_buf_string(void *buf, uint8_t str_len, size_t buf_size) {
14+
for (size_t i = 0; i < buf_size; ++i) {
15+
for (size_t j = 0; j < str_len; ++j) {
16+
((uint32_t *) buf)[i + j] = (uint32_t) i + 1;
17+
}
18+
}
19+
return true;
20+
}
21+
22+
CUTEST_TEST_SETUP(stringshuffle) {
23+
blosc2_init();
24+
25+
// Add parametrizations
26+
CUTEST_PARAMETRIZE(shapes, _test_shapes, CUTEST_DATA(
27+
{2, {40, 40}, {20, 20}, {10, 10}},
28+
{3, {40, 55, 23}, {31, 5, 22}, {4, 4, 4}},
29+
{3, {40, 0, 12}, {31, 0, 12}, {10, 0, 12}},
30+
{4, {50, 60, 31, 12}, {25, 20, 20, 10}, {5, 5, 5, 10}},
31+
{5, {1, 1, 1024, 1, 1}, {1, 1, 500, 1, 1}, {1, 1, 200, 1, 1}},
32+
{6, {5, 1, 50, 3, 1, 2}, {5, 1, 50, 2, 1, 2}, {2, 1, 20, 2, 1, 2}},
33+
{B2ND_MAX_DIM, {2, 3, 1, 1, 1, 1, 8, 1, 2, 2, 1, 1, 1, 1, 1, 2},
34+
{1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1},
35+
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
36+
));
37+
}
38+
39+
40+
CUTEST_TEST_TEST(stringshuffle) {
41+
CUTEST_GET_PARAMETER(shapes, _test_shapes);
42+
uint8_t str_len = 10;
43+
uint8_t charsize = sizeof(uint32_t);
44+
uint8_t typesize = charsize * str_len;
45+
blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS;
46+
cparams.nthreads = 2;
47+
cparams.typesize = typesize;
48+
cparams.filters[BLOSC2_MAX_FILTERS - 1] = BLOSC_SHUFFLE;
49+
cparams.filters_meta[BLOSC2_MAX_FILTERS - 1] = charsize;
50+
51+
/* Create original data */
52+
size_t buffersize = (size_t) typesize;
53+
for (int i = 0; i < shapes.ndim; ++i) {
54+
buffersize *= (size_t) shapes.shape[i];
55+
}
56+
uint8_t *buffer = malloc(buffersize);
57+
CUTEST_ASSERT("Buffer filled incorrectly", fill_buf_string(buffer, str_len, buffersize / typesize));
58+
59+
/* Compress with filters_meta*/
60+
blosc2_context* cctx = blosc2_create_cctx(cparams);
61+
size_t dest_buffersize = buffersize + BLOSC2_MAX_OVERHEAD;
62+
uint8_t *dest = malloc(dest_buffersize);
63+
int cbytes = blosc2_compress_ctx(cctx, buffer, buffersize, dest, dest_buffersize);
64+
B2ND_TEST_ASSERT(cbytes);
65+
66+
uint8_t *dest2 = malloc(dest_buffersize);
67+
cctx->filters_meta[-1] = 0; // now will use typesize by default for shuffle
68+
int cbytes2 = blosc2_compress_ctx(cctx, buffer, buffersize, dest2, dest_buffersize);
69+
B2ND_TEST_ASSERT(cbytes2);
70+
if (cbytes2 < cbytes){
71+
printf("Shuffle works better using stringsize not charsize!");
72+
return BLOSC2_ERROR_FAILURE;
73+
}
74+
/* Do b2nd_array roundtrip */
75+
blosc2_storage b2_storage = {.cparams=&cparams};
76+
b2nd_context_t *b2nd_ctx = b2nd_create_ctx(&b2_storage, shapes.ndim, shapes.shape,
77+
shapes.chunkshape, shapes.blockshape, NULL, 0, NULL, 0);
78+
b2nd_array_t *src;
79+
B2ND_TEST_ASSERT(b2nd_from_cbuffer(b2nd_ctx, &src, buffer, buffersize));
80+
uint8_t *buffer_dest = malloc(buffersize);
81+
B2ND_TEST_ASSERT(b2nd_to_cbuffer(src, buffer_dest, buffersize));
82+
83+
/* Testing */
84+
B2ND_TEST_ASSERT_BUFFER(buffer, buffer_dest, (int) buffersize);
85+
86+
/* Free mallocs */
87+
free(buffer);
88+
free(buffer_dest);
89+
free(dest);
90+
free(dest2);
91+
blosc2_free_ctx(cctx);
92+
B2ND_TEST_ASSERT(b2nd_free(src));
93+
B2ND_TEST_ASSERT(b2nd_free_ctx(b2nd_ctx));
94+
95+
return BLOSC2_ERROR_SUCCESS;
96+
}
97+
98+
99+
CUTEST_TEST_TEARDOWN(stringshuffle) {
100+
blosc2_destroy();
101+
}
102+
103+
int main() {
104+
CUTEST_TEST_RUN(stringshuffle);
105+
}

0 commit comments

Comments
 (0)