Skip to content
Merged
27 changes: 16 additions & 11 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ jobs:
packages: zlib1g-dev
cmake-args: -D PREFER_EXTERNAL_ZLIB=ON

# Not too recent Zstd libs (< 1.4.4) in distros perform pretty bad on compression
# ratios when using dictionaries, making some tests not passing.
# Commenting this out for the time being.
# - name: Ubuntu GCC External ZSTD
# os: ubuntu-latest
# compiler: gcc
# packages: zstd libzstd-dev
# cmake-args: -D PREFER_EXTERNAL_ZSTD=ON
# Not too recent Zstd libs (< 1.4.4) in distros perform pretty bad on compression
# ratios when using dictionaries, making some tests not passing.
# Commenting this out for the time being.
# - name: Ubuntu GCC External ZSTD
# os: ubuntu-latest
# compiler: gcc
# packages: zstd libzstd-dev
# cmake-args: -D PREFER_EXTERNAL_ZSTD=ON

# For some reason, some tests do not pass on ARM SF and HF. Not sure what's going on,
# but having ARM AARCH64 working is good enough for now, so commenting the former out.
Expand All @@ -57,10 +57,15 @@ jobs:
# packages: qemu qemu-user gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc-dev-armel-cross
# cmake-args: -D CMAKE_TOOLCHAIN_FILE=cmake/toolchain-armhf.cmake

- name: Ubuntu GCC AARCH64
# - name: Ubuntu GCC AARCH64
# os: ubuntu-latest
# packages: qemu-system-aarch64 qemu-user gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc-dev-arm64-cross
# cmake-args: -D CMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake

- name: Ubuntu GCC AARCH64 (native)
os: ubuntu-latest
packages: qemu-system-aarch64 qemu-user gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc-dev-arm64-cross
cmake-args: -D CMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake
arch: arm64
compiler: gcc

- name: Ubuntu Clang
os: ubuntu-latest
Expand Down
14 changes: 12 additions & 2 deletions bench/b2nd/bench_concatenate.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ int main() {
const int height = 1000;
const int nimages_inbuf = 10;
int64_t N_images = 1000;
bool copy = true; // whether to copy the data or expand src1
bool copy = false; // whether to copy the data or expand src1

// Shapes of the b2nd array
// int64_t shape[] = {N_images, height, width};
Expand All @@ -50,6 +50,7 @@ int main() {
storage.cparams = &cparams;

char *accel_str;
double t, t_accel;
for (int accel=0; accel <= 1; accel++) {
int32_t new_chunkshape[3] = {chunkshape[0], chunkshape[1], chunkshape[2]};
if (!accel) {
Expand Down Expand Up @@ -97,7 +98,14 @@ int main() {
src1 = array;
}
blosc_set_timestamp(&t1);
printf("Time to append (%s): %.4f s\n", accel_str, blosc_elapsed_secs(t0, t1));
if (!accel) {
t = blosc_elapsed_secs(t0, t1);
printf("Time to append (%s): %.4f s\n", accel_str, t);
}
else {
t_accel = blosc_elapsed_secs(t0, t1);
printf("Time to append (%s): %.4f s\n", accel_str, t_accel);
}
printf("Number of chunks: %" PRId64 "\n", array->sc->nchunks);
// printf("Shape of array: (%" PRId64 ", %" PRId64 ", %" PRId64 ")\n",
// array->shape[0], array->shape[1], array->shape[2]);
Expand All @@ -109,6 +117,8 @@ int main() {
b2nd_free_ctx(ctx);
}
free(image);
blosc2_remove_urlpath(urlpath);
printf("Spedup: %.2fx\n", t / t_accel);

blosc2_destroy();
return 0;
Expand Down
103 changes: 73 additions & 30 deletions blosc/b2nd.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,10 +776,6 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
}

uint8_t *buffer_b = buffer;
const int64_t *buffer_start = start;
const int64_t *buffer_stop = stop;
const int64_t *buffer_shape = shape;

int8_t ndim = array->ndim;

// 0-dim case
Expand Down Expand Up @@ -884,11 +880,11 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
int64_t update_nchunks = 1;
for (int i = 0; i < ndim; ++i) {
int64_t pos = 0;
while (pos <= buffer_start[i]) {
while (pos <= start[i]) {
pos += array->chunkshape[i];
}
update_start[i] = pos / array->chunkshape[i] - 1;
while (pos < buffer_stop[i]) {
while (pos < stop[i]) {
pos += array->chunkshape[i];
}
update_shape[i] = pos / array->chunkshape[i] - update_start[i];
Expand Down Expand Up @@ -916,7 +912,7 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
}
bool chunk_empty = false;
for (int i = 0; i < ndim; ++i) {
chunk_empty |= (chunk_stop[i] <= buffer_start[i] || chunk_start[i] >= buffer_stop[i]);
chunk_empty |= (chunk_stop[i] <= start[i] || chunk_start[i] >= stop[i]);
}
if (chunk_empty) {
continue;
Expand All @@ -927,7 +923,7 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
// Check if all the chunk is going to be updated and avoid the decompression
bool decompress_chunk = false;
for (int i = 0; i < ndim; ++i) {
decompress_chunk |= (chunk_start[i] < buffer_start[i] || chunk_stop[i] > buffer_stop[i]);
decompress_chunk |= (chunk_start[i] < start[i] || chunk_stop[i] > stop[i]);
}

if (decompress_chunk) {
Expand Down Expand Up @@ -1022,8 +1018,8 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
// compute the start of the slice inside the block
int64_t slice_start[B2ND_MAX_DIM] = {0};
for (int i = 0; i < ndim; ++i) {
if (block_start[i] < buffer_start[i]) {
slice_start[i] = buffer_start[i] - block_start[i];
if (block_start[i] < start[i]) {
slice_start[i] = start[i] - block_start[i];
} else {
slice_start[i] = 0;
}
Expand All @@ -1032,8 +1028,8 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const

int64_t slice_stop[B2ND_MAX_DIM] = {0};
for (int i = 0; i < ndim; ++i) {
if (block_stop[i] > buffer_stop[i]) {
slice_stop[i] = block_shape[i] - (block_stop[i] - buffer_stop[i]);
if (block_stop[i] > stop[i]) {
slice_stop[i] = block_shape[i] - (block_stop[i] - stop[i]);
} else {
slice_stop[i] = block_stop[i] - block_start[i];
}
Expand All @@ -1046,13 +1042,12 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const
}

uint8_t *src = &buffer_b[0];
const int64_t *src_pad_shape = buffer_shape;

int64_t src_start[B2ND_MAX_DIM] = {0};
int64_t src_stop[B2ND_MAX_DIM] = {0};
for (int i = 0; i < ndim; ++i) {
src_start[i] = slice_start[i] - buffer_start[i];
src_stop[i] = slice_stop[i] - buffer_start[i];
src_start[i] = slice_start[i] - start[i];
src_stop[i] = slice_stop[i] - start[i];
}

uint8_t *dst = &data[nblock * array->blocknitems * array->sc->typesize];
Expand All @@ -1070,12 +1065,12 @@ int get_set_slice(void *buffer, int64_t buffersize, const int64_t *start, const

if (set_slice) {
b2nd_copy_buffer2(ndim, array->sc->typesize,
src, src_pad_shape, src_start, src_stop,
src, shape, src_start, src_stop,
dst, dst_pad_shape, dst_start);
} else {
b2nd_copy_buffer2(ndim, array->sc->typesize,
dst, dst_pad_shape, dst_start, dst_stop,
src, src_pad_shape, src_start);
src, shape, src_start);
}
}

Expand Down Expand Up @@ -1390,11 +1385,6 @@ int b2nd_concatenate(b2nd_context_t *ctx, const b2nd_array_t *src1, const b2nd_a
void *buffer = malloc(src2->sc->typesize * src2->extchunknitems);
BLOSC_ERROR_NULL(buffer, BLOSC2_ERROR_MEMORY_ALLOC);
for (int64_t nchunk = 0; nchunk < src2->sc->nchunks; ++nchunk) {
if (blosc2_schunk_decompress_chunk(src2->sc, nchunk, buffer,
src2->sc->typesize * (int32_t)src2->extchunknitems) <= 0) {
BLOSC_TRACE_ERROR("Error decompressing chunk");
goto cleanup;
}
// Get multidimensional chunk position
int64_t nchunk_ndim[B2ND_MAX_DIM] = {0};
int64_t chunkshape[B2ND_MAX_DIM] = {0};
Expand All @@ -1414,21 +1404,74 @@ int b2nd_concatenate(b2nd_context_t *ctx, const b2nd_array_t *src1, const b2nd_a
if (stop[i] > src2->shape[i]) {
stop[i] = src2->shape[i]; // Handle boundary chunks
}
}

// Apply offset only for concatenation axis
// Check if the chunk is aligned with dest chunks, and has the same blockshape
bool aligned = true;
// ...and get the chunk index in the dest array if aligned
int64_t nchunk_dest = 0;
int64_t chunks_in_array_strides[B2ND_MAX_DIM];
// Calculate strides for destination array
chunks_in_array_strides[(*array)->ndim - 1] = 1;
for (int i = (*array)->ndim - 2; i >= 0; --i) {
chunks_in_array_strides[i] = chunks_in_array_strides[i + 1] *
((*array)->extshape[i + 1] / (*array)->chunkshape[i + 1]);
}

for (int8_t i = 0; i < src2->ndim; ++i) {
if (src1->chunkshape[i] != src2->chunkshape[i] ||
src2->blockshape[i] != (*array)->blockshape[i] ||
(i == axis && (src1_shape[i]) % (*array)->chunkshape[i] != 0)
) {
aligned = false;
break;
}
// Calculate the destination chunk coordinate for this dimension
int64_t nchunk_ndim_dest = start[i] / (*array)->chunkshape[i];
// For the concatenation axis, add the offset
if (i == axis) {
start[i] += src1_shape[i];
stop[i] += src1_shape[i];
nchunk_ndim_dest += src1_shape[i] / (*array)->chunkshape[i];
}
nchunk_dest += nchunk_ndim_dest * chunks_in_array_strides[i];
}

// Copy the chunk to the correct position
BLOSC_ERROR(b2nd_set_slice_cbuffer(buffer, chunkshape,
src2->sc->typesize * src2->extchunknitems,
start, stop, *array));
if (aligned) {
// Get the uncompressed chunk buffer from the source array
bool needs_free = false;
uint8_t *chunk;
int32_t cbytes = blosc2_schunk_get_chunk(src2->sc, nchunk, &chunk, &needs_free);
if (cbytes < 0) {
BLOSC_TRACE_ERROR("Error getting chunk from source array");
BLOSC_ERROR(BLOSC2_ERROR_FAILURE);
}
// Update the chunk in the destination array
// We need to free only if needs_free is true or copy is false
// bool needs_copy = !needs_free || copy;
// BLOSC_ERROR(blosc2_schunk_update_chunk((*array)->sc, nchunk_dest, chunk, needs_copy));
// if (needs_free && !copy) {
// free(chunk);
// }
// TODO: the above makes some tests to crash, so always force a copy; try to optimize this later
BLOSC_ERROR(blosc2_schunk_update_chunk((*array)->sc, nchunk_dest, chunk, true));
if (needs_free) {
free(chunk);
}
}
else {
// Load chunk into buffer
BLOSC_ERROR(b2nd_get_slice_cbuffer(src2, start, stop, buffer, chunkshape, src2->sc->chunksize));

// Apply chunk offset only for concatenation axis
start[axis] += src1_shape[axis];
stop[axis] += src1_shape[axis];

// Copy the chunk to the correct position
BLOSC_ERROR(b2nd_set_slice_cbuffer(buffer, chunkshape,
src2->sc->typesize * src2->extchunknitems,
start, stop, *array));
}
}

cleanup:
free(buffer);

return BLOSC2_ERROR_SUCCESS;
Expand Down
2 changes: 1 addition & 1 deletion blosc/directories.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
sprintf(fname, "%s\\%s", dir_path, cfile.name);

ret = remove(fname);
free(fname);
if (ret < 0) {
BLOSC_TRACE_ERROR("Could not remove file %s", fname);
_findclose(file);
return BLOSC2_ERROR_FAILURE;
}
free(fname);
Copy link

Copilot AI Jun 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On error paths (e.g. when remove() fails) fname is not freed, resulting in a memory leak. Move free(fname) before the early return.

Copilot uses AI. Check for mistakes.
}

rmdir(dir_path);
Expand Down
2 changes: 2 additions & 0 deletions blosc/schunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ blosc2_schunk* blosc2_schunk_copy(blosc2_schunk *schunk, blosc2_storage *storage
BLOSC_TRACE_ERROR("Can not create a new schunk");
return NULL;
}
// Set the chunksize for the schunk, as it cannot be derived from storage
new_schunk->chunksize = schunk->chunksize ;

// Copy metalayers
for (int nmeta = 0; nmeta < schunk->nmetalayers; ++nmeta) {
Expand Down
Loading
Loading