Skip to content

Commit 29236d7

Browse files
committed
LibBlosc2: New chunk codec
1 parent 4343c2a commit 29236d7

File tree

11 files changed

+835
-0
lines changed

11 files changed

+835
-0
lines changed

LibBlosc2/CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Release Notes
2+
3+
All notable changes to this package will be documented in this file.
4+
5+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6+
7+
## Unreleased
8+
9+
### Added
10+
11+
- Initial release

LibBlosc2/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Erik Schnetter
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

LibBlosc2/Project.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name = "ChunkCodecLibBlosc2"
2+
uuid = "59b5581c-e2bc-42b3-a6f1-80e88eec7b70"
3+
authors = ["Erik Schnetter <[email protected]>"]
4+
version = "0.1.0"
5+
6+
[deps]
7+
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
8+
Blosc2_jll = "d43303dc-dd0e-56c6-b0a8-331f4c8c9bfb"
9+
ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"
10+
11+
[compat]
12+
Accessors = "0.1.42"
13+
Blosc2_jll = "201.1700.100"
14+
ChunkCodecCore = "0.5.0"
15+
julia = "1.10"
16+
17+
[workspace]
18+
projects = ["test"]

LibBlosc2/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# ChunkCodecLibBlosc2
2+
3+
## Warning: ChunkCodecLibBlosc2 is currently a WIP and its API may drastically change at any time.
4+
5+
This package implements the ChunkCodec interface for the following encoders and decoders
6+
using the c-blosc2 library <https://github.com/Blosc/c-blosc2>
7+
8+
1. `Blosc2Codec`, `Blosc2EncodeOptions`, `Blosc2DecodeOptions`
9+
10+
## Example
11+
12+
```julia-repl
13+
julia> using ChunkCodecLibBlosc2
14+
15+
julia> data = [0x00, 0x01, 0x02, 0x03];
16+
17+
julia> compressed_data = encode(Blosc2EncodeOptions(), data);
18+
19+
julia> decompressed_data = decode(Blosc2Codec(), compressed_data; max_size=length(data), size_hint=length(data));
20+
21+
julia> data == decompressed_data
22+
true
23+
```
24+
25+
The low level interface is defined in the `ChunkCodecCore` package.
26+

LibBlosc2/src/ChunkCodecLibBlosc2.jl

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
module ChunkCodecLibBlosc2
2+
3+
using Base.Libc: free
4+
5+
using Accessors
6+
7+
using Blosc2_jll: libblosc2
8+
9+
using ChunkCodecCore:
10+
Codec,
11+
EncodeOptions,
12+
DecodeOptions,
13+
check_in_range,
14+
check_contiguous,
15+
DecodingError
16+
import ChunkCodecCore:
17+
decode_options,
18+
try_decode!,
19+
try_encode!,
20+
encode_bound,
21+
try_find_decoded_size,
22+
decoded_size_range
23+
24+
export Blosc2Codec,
25+
Blosc2EncodeOptions,
26+
Blosc2DecodeOptions,
27+
Blosc2DecodingError
28+
29+
if VERSION >= v"1.11.0-DEV.469"
30+
eval(Meta.parse("public is_compressor_valid, compcode, compname"))
31+
end
32+
33+
# reexport ChunkCodecCore
34+
using ChunkCodecCore: ChunkCodecCore, encode, decode
35+
export ChunkCodecCore, encode, decode
36+
37+
include("libblosc2.jl")
38+
39+
"""
40+
struct Blosc2Codec <: Codec
41+
Blosc2Codec()
42+
43+
Blosc2 compression using c-blosc2 library: https://github.com/Blosc2/c-blosc2
44+
45+
Decoding does not accept any extra data appended to the compressed block.
46+
Decoding also does not accept truncated data, or multiple compressed blocks concatenated together.
47+
48+
[`Blosc2EncodeOptions`](@ref) and [`Blosc2DecodeOptions`](@ref)
49+
can be used to set decoding and encoding options.
50+
"""
51+
struct Blosc2Codec <: Codec end
52+
decode_options(::Blosc2Codec) = Blosc2DecodeOptions()
53+
54+
include("encode.jl")
55+
include("decode.jl")
56+
57+
# Initialize the Blosc2 library. This function is idempotent, i.e. it
58+
# can be called called multiple times without harm.
59+
__init__() = @ccall libblosc2.blosc2_init()::Cvoid
60+
61+
end # module ChunkCodecLibBlosc2

LibBlosc2/src/decode.jl

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""
2+
Blosc2DecodingError()
3+
4+
Error for data that cannot be decoded.
5+
"""
6+
struct Blosc2DecodingError <: DecodingError
7+
end
8+
9+
function Base.showerror(io::IO, err::Blosc2DecodingError)
10+
print(io, "Blosc2DecodingError: blosc2 compressed buffer cannot be decoded")
11+
return nothing
12+
end
13+
14+
"""
15+
struct Blosc2DecodeOptions <: DecodeOptions
16+
Blosc2DecodeOptions(; kwargs...)
17+
18+
Blosc2 decompression using c-blosc2 library: https://github.com/Blosc/c-blosc2
19+
20+
# Keyword Arguments
21+
22+
- `codec::Blosc2Codec=Blosc2Codec()`
23+
"""
24+
struct Blosc2DecodeOptions <: DecodeOptions
25+
codec::Blosc2Codec
26+
end
27+
Blosc2DecodeOptions(; codec::Blosc2Codec=Blosc2Codec(), kwargs...) = Blosc2DecodeOptions(codec)
28+
29+
function try_find_decoded_size(::Blosc2DecodeOptions, src::AbstractVector{UInt8})::Int64
30+
check_contiguous(src)
31+
32+
copy_cframe = false
33+
schunk = @ccall libblosc2.blosc2_schunk_from_buffer(src::Ptr{UInt8}, length(src)::Int64, copy_cframe::UInt8)::Ptr{Blosc2SChunk}
34+
if schunk == Ptr{Blosc2Storage}()
35+
# These are not a valid blosc2-encoded data
36+
throw(Blosc2DecodingError())
37+
end
38+
@ccall libblosc2.blosc2_schunk_avoid_cframe_free(schunk::Ptr{Blosc2SChunk}, true::UInt8)::Cvoid
39+
40+
total_nbytes = Int64(0)
41+
42+
nchunks = unsafe_load(schunk).nchunks
43+
for nchunk in 0:(nchunks - 1)
44+
cbuffer = Ref{Ptr{UInt8}}()
45+
needs_free = Ref{UInt8}()
46+
chunksize = @ccall libblosc2.blosc2_schunk_get_chunk(schunk::Ptr{Blosc2SChunk}, nchunk::Int64, cbuffer::Ref{Ptr{UInt8}},
47+
needs_free::Ref{UInt8})::Cint
48+
@assert chunksize > 0
49+
cbuffer = cbuffer[]
50+
needs_free = Bool(needs_free[])
51+
52+
nbytes = Ref{Int32}()
53+
success = @ccall libblosc2.blosc1_cbuffer_validate(cbuffer::Ptr{Cvoid}, chunksize::Cint, nbytes::Ref{Cint})::Cint
54+
@assert success == 0
55+
nbytes = nbytes[]
56+
57+
total_nbytes += nbytes
58+
59+
if needs_free
60+
# We could provide buffer into which to decode instead, reusing that buffer
61+
Libc.free(cbuffer)
62+
end
63+
end
64+
65+
# TODO: Use this instead of the loop above
66+
@assert unsafe_load(schunk).nbytes == total_nbytes
67+
68+
success = @ccall libblosc2.blosc2_schunk_free(schunk::Ptr{Cvoid})::Cint
69+
@assert success == 0
70+
71+
return total_nbytes::Int64
72+
end
73+
74+
#TODO: implement `try_resize_decode!`
75+
76+
function try_decode!(d::Blosc2DecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8};
77+
kwargs...)::Union{Nothing,Int64}
78+
check_contiguous(dst)
79+
check_contiguous(src)
80+
81+
schunk = @ccall libblosc2.blosc2_schunk_from_buffer(src::Ptr{UInt8}, length(src)::Int64, false::UInt8)::Ptr{Blosc2SChunk}
82+
@assert schunk != Ptr{Blosc2Storage}()
83+
@ccall libblosc2.blosc2_schunk_avoid_cframe_free(schunk::Ptr{Blosc2SChunk}, true::UInt8)::Cvoid
84+
85+
there_was_an_error = false
86+
total_nbytes = Int64(0)
87+
88+
nchunks = unsafe_load(schunk).nchunks
89+
for nchunk in 0:(nchunks - 1)
90+
cbuffer = Ref{Ptr{UInt8}}()
91+
needs_free = Ref{UInt8}()
92+
chunksize = @ccall libblosc2.blosc2_schunk_get_chunk(schunk::Ptr{Blosc2SChunk}, nchunk::Int64, cbuffer::Ref{Ptr{UInt8}},
93+
needs_free::Ref{UInt8})::Cint
94+
@assert chunksize > 0
95+
cbuffer = cbuffer[]
96+
needs_free = Bool(needs_free[])
97+
98+
nbytes = Ref{Int32}()
99+
success = @ccall libblosc2.blosc1_cbuffer_validate(cbuffer::Ptr{Cvoid}, chunksize::Cint, nbytes::Ref{Cint})::Cint
100+
@assert success == 0
101+
nbytes = nbytes[]
102+
103+
if needs_free
104+
Libc.free(cbuffer)
105+
end
106+
107+
# TODO: Use this instead of checking each chunk
108+
@assert unsafe_load(schunk).nbytes == nbytes
109+
110+
if total_nbytes + nbytes > length(dst)
111+
there_was_an_error = true
112+
break
113+
end
114+
115+
@assert total_nbytes + nbytes <= length(dst)
116+
nbytes′ = @ccall libblosc2.blosc2_schunk_decompress_chunk(schunk::Ptr{Blosc2SChunk}, nchunk::Int64,
117+
pointer(dst, total_nbytes+1)::Ptr{Cvoid}, nbytes::Int32)::Cint
118+
@assert nbytes′ >= 0
119+
@assert nbytes′ == nbytes
120+
121+
total_nbytes += nbytes
122+
end
123+
124+
success = @ccall libblosc2.blosc2_schunk_free(schunk::Ptr{Cvoid})::Cint
125+
@assert success == 0
126+
127+
if there_was_an_error
128+
return nothing
129+
end
130+
131+
return total_nbytes::Int64
132+
end

0 commit comments

Comments
 (0)