Skip to content

Commit 0150a1a

Browse files
authored
Merge pull request #378 from t20100/update-c-blosc2
Updated c-blosc2 v2.23.0
2 parents f9284d8 + fece571 commit 0150a1a

33 files changed

+385
-274
lines changed

doc/information.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ HDF5 compression filters and compression libraries sources were obtained from:
7070
* `hdf5-blosc plugin <https://github.com/Blosc/hdf5-blosc>`_ (v1.0.1)
7171
using `c-blosc <https://github.com/Blosc/c-blosc>`_ (v1.21.6), LZ4, Snappy, ZLib and ZStd.
7272
* `hdf5-blosc2 plugin <https://github.com/Blosc/HDF5-Blosc2>`_ (v2.0.0)
73-
using `c-blosc2 <https://github.com/Blosc/c-blosc2>`_ (v2.21.2), LZ4, ZLib and ZStd.
73+
using `c-blosc2 <https://github.com/Blosc/c-blosc2>`_ (v2.23.0), LZ4, ZLib and ZStd.
7474
* `FCIDECOMP plugin <https://gitlab.eumetsat.int/open-source/data-tailor-plugins/fcidecomp>`_
7575
(`v2.1.1 <https://gitlab.eumetsat.int/open-source/data-tailor-plugins/fcidecomp/-/tree/2.1.1>`_)
7676
using `CharLS <https://github.com/team-charls/charls>`_ (v2.1.0).

lib/c-blosc2/.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
* text=auto
2+
*.md text eol=lf

lib/c-blosc2/.github/workflows/cmake.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,16 @@ jobs:
9191
compiler: clang
9292
cmake-args: -D DEACTIVATE_ZSTD=ON
9393

94+
- name: Windows Clang Win64
95+
os: windows-latest
96+
compiler: clang-cl
97+
cmake-args: -G Ninja
98+
9499
- name: Windows MSVC Win64
95100
os: windows-latest
96101
compiler: cl
97102
cmake-args: -A x64
98-
103+
99104
- name: Windows GCC Ninja
100105
os: windows-latest
101106
compiler: gcc
@@ -115,7 +120,7 @@ jobs:
115120
compiler: gcc
116121

117122
steps:
118-
- uses: actions/checkout@v5
123+
- uses: actions/checkout@v6
119124

120125
- name: Install packages (Ubuntu)
121126
if: runner.os == 'Linux' && matrix.packages
@@ -132,7 +137,7 @@ jobs:
132137
run: brew install ninja ${{ matrix.packages }}
133138

134139
- name: Generate project files
135-
run: cmake -S ${{ matrix.build-src-dir || '.' }} -B ${{ matrix.build-dir || '.' }} ${{ matrix.cmake-args }} -D CMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -D BUILD_SHARED_LIBS=OFF
140+
run: cmake -S ${{ matrix.build-src-dir || '.' }} -B ${{ matrix.build-dir || '.' }} ${{ matrix.cmake-args }} -D CMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -D BUILD_SHARED_LIBS=OFF -D CMAKE_C_COMPILER=${{ matrix.compiler }} -D CMAKE_CXX_COMPILER=${{ matrix.compiler }}
136141
env:
137142
CC: ${{ matrix.compiler }}
138143
CFLAGS: ${{ matrix.cflags }}

lib/c-blosc2/.github/workflows/fuzz.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
dry-run: false
1919

2020
- name: Upload Crash
21-
uses: actions/upload-artifact@v4
21+
uses: actions/upload-artifact@v6
2222
if: failure()
2323
with:
2424
name: artifacts

lib/c-blosc2/ANNOUNCE.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
# Announcing C-Blosc2 2.21.2
1+
# Announcing C-Blosc2 2.23.0
22
A fast, compressed and persistent binary data store library for C.
33

44
## What is new?
55

6-
This is a maintenance release, with a few fixes and some optimizations.
7-
Thanks to Barak Ugav and Preeyan Parmar for their contributions.
6+
The main change is in ``blosc2_prefilter_params``, which has a new field.
7+
There is also a new ``typesize`` field in ``blosc2_dparams`` and some other changes to support the blosc2-openzl plugin.
88

99
For more info, see the release notes in:
1010

lib/c-blosc2/Blosc2Config.cmake.in

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,48 @@ foreach(comp ${Blosc2_FIND_COMPONENTS})
8686
endif()
8787
endforeach()
8888

89+
# Defines imported targets for Blosc2 inside a Python wheel
90+
91+
# ------------------------------
92+
# Shared library target
93+
# ------------------------------
94+
if(NOT TARGET Blosc2::blosc2_shared)
95+
add_library(Blosc2::blosc2_shared SHARED IMPORTED GLOBAL)
96+
97+
if(WIN32)
98+
# MSVC: import library (.lib) + runtime DLL (.dll)
99+
set_target_properties(Blosc2::blosc2_shared PROPERTIES
100+
IMPORTED_IMPLIB "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.lib"
101+
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.dll"
102+
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
103+
)
104+
else()
105+
# Linux/macOS
106+
set_target_properties(Blosc2::blosc2_shared PROPERTIES
107+
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.so"
108+
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
109+
)
110+
endif()
111+
endif()
112+
113+
# ------------------------------
114+
# Static library target
115+
# ------------------------------
116+
if(NOT TARGET Blosc2::blosc2_static)
117+
add_library(Blosc2::blosc2_static STATIC IMPORTED GLOBAL)
118+
119+
if(MSVC)
120+
# Windows static library uses .lib
121+
set_target_properties(Blosc2::blosc2_static PROPERTIES
122+
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.lib"
123+
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
124+
)
125+
else()
126+
# Linux/macOS static library uses .a
127+
set_target_properties(Blosc2::blosc2_static PROPERTIES
128+
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.a"
129+
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
130+
)
131+
endif()
132+
endif()
133+

lib/c-blosc2/README.rst

Lines changed: 10 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -61,69 +61,17 @@ is useful <https://www.youtube.com/watch?v=LvP9zxMGBng>`_:
6161
:alt: Slicing a dataset in pineapple-style
6262
:target: https://www.youtube.com/watch?v=LvP9zxMGBng
6363

64+
.. include:: ../WHATS-NEW.rst
6465

65-
New features in C-Blosc2
66-
========================
67-
68-
* **64-bit containers:** the first-class container in C-Blosc2 is the `super-chunk` or, for brevity, `schunk`, that is made by smaller chunks which are essentially C-Blosc1 32-bit containers. The super-chunk can be backed or not by another container which is called a `frame` (see later).
69-
70-
* **NDim containers (B2ND):** allow to store n-dimensional data (aka tensors) that can efficiently read datasets in slices that can be n-dimensional too. To achieve this, a n-dimensional 2-level partitioning has been implemented.
71-
72-
* **More filters:** besides `shuffle` and `bitshuffle` already present in C-Blosc1, C-Blosc2 already implements:
73-
74-
- `bytedelta`: calculates the difference between bytes in a block that has been shuffled already. We have `blogged about bytedelta <https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/>`_.
75-
76-
- `delta`: the stored blocks inside a chunk are diff'ed with respect to first block in the chunk. The idea is that, in some situations, the diff will have more zeros than the original data, leading to better compression.
77-
78-
- `trunc_prec`: it zeroes the least significant bits of the mantissa of float32 and float64 types. When combined with the `shuffle` or `bitshuffle` filter, this leads to more contiguous zeros, which are compressed better.
79-
80-
* **A filter pipeline:** the different filters can be pipelined so that the output of one can the input for the other. A possible example is a `delta` followed by `shuffle`, or as described above, `trunc_prec` followed by `bitshuffle`.
81-
82-
* **Prefilters:** allow to apply user-defined C callbacks **prior** the filter pipeline during compression. See `test_prefilter.c <https://github.com/Blosc/c-blosc2/blob/main/tests/test_prefilter.c>`_ for an example of use.
83-
84-
* **Postfilters:** allow to apply user-defined C callbacks **after** the filter pipeline during decompression. The combination of prefilters and postfilters could be interesting for supporting e.g. encryption (via prefilters) and decryption (via postfilters). Also, a postfilter alone can be used to produce on-the-flight computation based on existing data (or other metadata, like e.g. coordinates). See `test_postfilter.c <https://github.com/Blosc/c-blosc2/blob/main/tests/test_postfilter.c>`_ for an example of use.
85-
86-
* **SIMD support for ARM (NEON):** this allows for faster operation on ARM architectures. Only `shuffle` is supported right now, but the idea is to implement `bitshuffle` for NEON too. Thanks to Lucian Marc.
87-
88-
* **SIMD support for PowerPC (ALTIVEC):** this allows for faster operation on PowerPC architectures. Both `shuffle` and `bitshuffle` are supported; however, this has been done via a transparent mapping from SSE2 into ALTIVEC emulation in GCC 8, so performance could be better (but still, it is already a nice improvement over native C code; see PR https://github.com/Blosc/c-blosc2/pull/59 for details). Thanks to Jerome Kieffer and `ESRF <https://www.esrf.fr>`_ for sponsoring the Blosc team in helping him in this task.
89-
90-
* **Dictionaries:** when a block is going to be compressed, C-Blosc2 can use a previously made dictionary (stored in the header of the super-chunk) for compressing all the blocks that are part of the chunks. This usually improves the compression ratio, as well as the decompression speed, at the expense of a (small) overhead in compression speed. Currently, it is only supported in the `zstd` codec, but would be nice to extend it to `lz4` and `blosclz` at least.
91-
92-
* **Contiguous frames:** allow to store super-chunks contiguously, either on-disk or in-memory. When a super-chunk is backed by a frame, instead of storing all the chunks sparsely in-memory, they are serialized inside the frame container. The frame can be stored on-disk too, meaning that persistence of super-chunks is supported.
93-
94-
* **Sparse frames:** each chunk in a super-chunk is stored in a separate file or different memory area, as well as the metadata. This is allows for more efficient updates/deletes than in contiguous frames (i.e. avoiding 'holes' in monolithic files). The drawback is that it consumes more inodes when on-disk. Thanks to Marta Iborra for this contribution.
95-
96-
* **Partial chunk reads:** there is support for reading just part of chunks, so avoiding to read the whole thing and then discard the unnecessary data.
97-
98-
* **Parallel chunk reads:** when several blocks of a chunk are to be read, this is done in parallel by the decompressing machinery. That means that every thread is responsible to read, post-filter and decompress a block by itself, leading to an efficient overlap of I/O and CPU usage that optimizes reads to a maximum.
99-
100-
* **Meta-layers:** optionally, the user can add meta-data for different uses and in different layers. For example, one may think on providing a meta-layer for `NumPy <https://numpy.org>`_ so that most of the meta-data for it is stored in a meta-layer; then, one can place another meta-layer on top of the latter for adding more high-level info if desired (e.g. geo-spatial, meteorological...).
101-
102-
* **Variable length meta-layers:** the user may want to add variable-length meta information that can be potentially very large (up to 2 GB). The regular meta-layer described above is very quick to read, but meant to store fixed-length and relatively small meta information. Variable length metalayers are stored in the trailer of a frame, whereas regular meta-layers are in the header.
103-
104-
* **Efficient support for special values:** large sequences of repeated values can be represented with an efficient, simple and fast run-length representation, without the need to use regular codecs. With that, chunks or super-chunks with values that are the same (zeros, NaNs or any value in general) can be built in constant time, regardless of the size. This can be useful in situations where a lot of zeros (or NaNs) need to be stored (e.g. sparse matrices).
105-
106-
* **Nice markup for documentation:** we are currently using a combination of Sphinx + Doxygen + Breathe for documenting the C-API. See https://www.blosc.org/c-blosc2/c-blosc2.html. Thanks to Alberto Sabater and Aleix Alcacer for contributing the support for this.
107-
108-
* **Plugin capabilities for filters and codecs:** we have a plugin register capability inplace so that the info about the new filters and codecs can be persisted and transmitted to different machines. See https://github.com/Blosc/c-blosc2/blob/main/examples/urfilters.c for a self-contained example. Thanks to the NumFOCUS foundation for providing a grant for doing this, and Oscar Griñón and Aleix Alcacer for the implementation.
109-
110-
* **Pluggable tuning capabilities:** this will allow users with different needs to define an interface so as to better tune different parameters like the codec, the compression level, the filters to use, the blocksize or the shuffle size. Thanks to ironArray for sponsoring us in doing this.
111-
112-
* **Support for I/O plugins:** so that users can extend the I/O capabilities beyond the current filesystem support. Things like the use of databases or S3 interfaces should be possible by implementing these interfaces. Thanks to ironArray for sponsoring us in doing this.
113-
114-
* **Security:** we are actively using using the `OSS-Fuzz <https://github.com/google/oss-fuzz>`_ and `ClusterFuzz <https://oss-fuzz.com>`_ for uncovering programming errors in C-Blosc2. Thanks to Google for sponsoring us in doing this, and to Nathan Moinvaziri for most of the work here.
115-
116-
More info about the `improved capabilities of C-Blosc2 can be found in this talk <https://www.blosc.org/docs/Caterva-HDF5-Workshop.pdf>`_.
117-
118-
C-Blosc2 API and format have been frozen, and that means that there is guarantee that your programs will continue to work with future versions of the library, and that next releases will be able to read from persistent storage generated from previous releases (as of 2.0.0).
66+
More info about the `improved capabilities of C-Blosc2 can be found in this paper <https://www.blosc.org/docs/Exploring-MilkyWay-SciPy2023-paper.pdf>`_. Please, cite it if you use C-Blosc2 in your research!
11967

12068

12169
Open format
12270
===========
12371

12472
The Blosc2 format is open and `fully documented <https://github.com/Blosc/c-blosc2/blob/main/README_FORMAT.rst>`_.
12573

126-
The format specs are defined in less than 1000 lines of text, so they should be easy to read and understand. In our opinion, this is very important for the long-term success of the library, as it allows for third-party implementations of the format, and also for the users to understand what is going on under the hood.
74+
The format specs are defined in less than 4000 words, so they should be easy to read and understand. In our opinion, this is critical for the long-term success of the library, as it allows for third-party implementations of the format, and also for the users to understand what is going on under the hood.
12775

12876

12977
Python wrapper
@@ -194,6 +142,11 @@ Or, you may want to use a codec in an external library already in the system:
194142
195143
cmake -DPREFER_EXTERNAL_LZ4=ON ..
196144
145+
For OpenZL, there are problems with the build seemingly, so, after building and installing into ``build-cmake`` in the ``openzl`` directory, one has to run:
146+
147+
.. code-block:: console
148+
149+
cmake -DPREFER_EXTERNAL_OPENZL=ON -DOPENZL_LIBRARY=$HOME/openzl/build-cmake/install/lib/libopenzl.a -DOPENZL_INCLUDE_DIR=$HOME/openzl/build-cmake/install/include ..
197150
198151
Supported platforms
199152
~~~~~~~~~~~~~~~~~~~
@@ -225,7 +178,7 @@ the ``BLOSC_TRACE`` environment variable.
225178
Contributing
226179
============
227180

228-
If you want to collaborate in this development you are welcome. We need help in the different areas listed at the `ROADMAP <https://github.com/Blosc/c-blosc2/blob/main/ROADMAP.rst>`_; also, be sure to read our `DEVELOPING-GUIDE <https://github.com/Blosc/c-blosc2/blob/main/DEVELOPING-GUIDE.rst>`_ and our `Code of Conduct <https://github.com/Blosc/community/blob/master/code_of_conduct.md>`_. Blosc is distributed using the `BSD license <https://github.com/Blosc/c-blosc2/blob/main/LICENSE.txt>`_.
181+
If you want to collaborate in this development you are welcome. We need help in the different areas listed at the `ROADMAP <https://github.com/Blosc/c-blosc2/blob/main/ROADMAP-TO-3.0.rst>`_; also, be sure to read our `DEVELOPING-GUIDE <https://github.com/Blosc/c-blosc2/blob/main/DEVELOPING-GUIDE.rst>`_ and our `Code of Conduct <https://github.com/Blosc/community/blob/master/code_of_conduct.md>`_. Blosc is distributed using the `BSD license <https://github.com/Blosc/c-blosc2/blob/main/LICENSE.txt>`_.
229182

230183

231184
Tweeter feed
@@ -244,7 +197,7 @@ You can cite our work on the different libraries under the Blosc umbrella as:
244197
@ONLINE{blosc,
245198
author = {{Blosc Development Team}},
246199
title = "{A fast, compressed and persistent data store library}",
247-
year = {2009-2023},
200+
year = {2009-2025},
248201
note = {https://blosc.org}
249202
}
250203

lib/c-blosc2/RELEASE_NOTES.md

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,26 @@
11
Release notes for C-Blosc2
22
==========================
33

4+
Changes from 2.22.0 to 2.23.0
5+
=============================
6+
7+
* Changes to allow use of miniexpr. This breaks the ABI as a public struct has an additional field.
8+
* Changes to enable blosc2-openzl plugin
9+
10+
11+
Changes from 2.21.3 to 2.22.0
12+
=============================
13+
14+
* `b2nd_squeeze` and `b2nd_squeeze_index` now return views and their signatures have consequently changed.
15+
* bug fix for indexing `[ndim - 1]` which caused an error when `ndim` is 0
16+
17+
Changes from 2.21.2 to 2.21.3
18+
=============================
19+
20+
* Increase MAX_DIMS from 8 to 16
21+
* Fix compatibility with glibc v2.42
22+
* Bug fix in ``unidim_to_multidim``
23+
424
Changes from 2.21.1 to 2.21.2
525
=============================
626

@@ -517,7 +537,7 @@ Changes from 2.6.1 to 2.7.1
517537
Thanks to @bnavigator.
518538

519539
* BloscLZ codec is now treated exactly the same as LZ4. Before BloscLZ was considered less capable of reaching
520-
decent compression ratios, but this has changed quite a bit lately, so there is no point in treating both differently.
540+
decent compression ratios, but this has changed quite a bit lately, so there is no point in treating both differently.
521541

522542
* Fixed some leaks, mainly on the test suite.
523543

@@ -564,7 +584,7 @@ Changes from 2.4.3 to 2.5.0
564584
* `out_size` -> `output_size`
565585
* `out_typesize` -> `output_typesize`
566586
* `out_offset` -> `output_offset`
567-
This was needed to allow Cython to map the fields (`in` is a reserved word in Python).
587+
This was needed to allow Cython to map the fields (`in` is a reserved word in Python).
568588

569589
* Disabled maskout reads in `blosc2_schunk_get_slice_buffer()` as they are not faster than getitem there.
570590

@@ -749,7 +769,7 @@ Changes from 2.0.1 to 2.0.2
749769

750770
* Fixed a bug when a lazy_chunk was created from a small, memcpyed chunk.
751771
(see #329).
752-
772+
753773
* Fixed many issues in documentation (see #333).
754774

755775

@@ -809,9 +829,9 @@ Changes from 2.0.0-beta.5 to 2.0.0.rc.1
809829
=======================================
810830

811831
* [API change] `blosc2_decompress_ctx()` gets a new `srcsize`
812-
parameter to ensure that it does not read past the end
813-
of the provided buffer. See #144. Thanks to Nathan Moinvaziri
814-
(@nmoinvaz).
832+
parameter to ensure that it does not read past the end
833+
of the provided buffer. See #144. Thanks to Nathan Moinvaziri
834+
(@nmoinvaz).
815835

816836
* [BREAKING CHANGE] The format for frames has changed and
817837
BLOSC2_VERSION_FRAME_FORMAT is now set to 2. There is no attempt to support
@@ -893,7 +913,7 @@ Changes from 2.0.0-beta.3 to 2.0.0-beta.4
893913
* The endianness of the platform that is writing the data in chunks is stored
894914
now in the headers of the chunks. This info is not used yet, but this
895915
should allow a good hint for implementing format compatibility among
896-
platforms with different endianness in other layers. See PR #84.
916+
platforms with different endianness in other layers. See PR #84.
897917

898918
* Fixed a nasty bug that prevented frames to go more than 2 GB in size.
899919

0 commit comments

Comments
 (0)