JuliaIO
diff --git a/‎LibLz4/src/ChunkCodecLibLz4.jl
Lines changed: 21 additions & 3 deletions b/‎LibLz4/src/ChunkCodecLibLz4.jl
Lines changed: 21 additions & 3 deletions
diff --git a/‎LibLz4/src/decode.jl
Lines changed: 114 additions & 0 deletions b/‎LibLz4/src/decode.jl
Lines changed: 114 additions & 0 deletions
diff --git a/‎LibLz4/src/encode.jl
Lines changed: 115 additions & 0 deletions b/‎LibLz4/src/encode.jl
Lines changed: 115 additions & 0 deletions
@@ -30,9 +30,9 @@ export LZ4FrameCodec,
     LZ4NumcodecsCodec,
     LZ4NumcodecsEncodeOptions,
     LZ4NumcodecsDecodeOptions,
-    # LZ4HDF5Codec,
-    # LZ4HDF5EncodeOptions,
-    # LZ4HDF5DecodeOptions,
+    LZ4HDF5Codec,
+    LZ4HDF5EncodeOptions,
+    LZ4HDF5DecodeOptions,
     LZ4DecodingError
 
 # reexport ChunkCodecCore
@@ -108,6 +108,24 @@ struct LZ4NumcodecsCodec <: Codec
 end
 decode_options(::LZ4NumcodecsCodec) = LZ4NumcodecsDecodeOptions() # default decode options
 
+"""
+    struct LZ4HDF5Codec <: Codec
+    LZ4HDF5Codec()
+
+LZ4 HDF5 format compression using liblz4: https://lz4.org/
+
+This is the LZ4 HDF5 format used in HDF5 Filter ID: 32004.
+
+This format is documented in https://github.com/HDFGroup/hdf5_plugins
+
+This format is NOT compatible with the `lz4` CLI.
+
+See also [`LZ4HDF5EncodeOptions`](@ref) and [`LZ4HDF5DecodeOptions`](@ref)
+"""
+struct LZ4HDF5Codec <: Codec
+end
+decode_options(::LZ4HDF5Codec) = LZ4HDF5DecodeOptions() # default decode options
+
 include("encode.jl")
 include("decode.jl")
 
 
@@ -390,3 +390,117 @@ function try_decode!(d::LZ4NumcodecsDecodeOptions, dst::AbstractVector{UInt8}, s
         end
     end
 end
+
+
+"""
+    struct LZ4HDF5DecodeOptions <: DecodeOptions
+    LZ4HDF5DecodeOptions(; kwargs...)
+
+LZ4 HDF5 format compression using liblz4: https://lz4.org/
+
+This is the LZ4 HDF5 format used in HDF5 Filter ID: 32004.
+
+This format is documented in https://github.com/HDFGroup/hdf5_plugins
+
+This format is NOT compatible with the `lz4` CLI.
+
+# Keyword Arguments
+
+- `codec::LZ4HDF5Codec=LZ4HDF5Codec()`
+"""
+struct LZ4HDF5DecodeOptions <: DecodeOptions
+    codec::LZ4HDF5Codec
+end
+function LZ4HDF5DecodeOptions(;
+        codec::LZ4HDF5Codec=LZ4HDF5Codec(),
+        kwargs...
+    )
+    LZ4HDF5DecodeOptions(codec)
+end
+
+is_thread_safe(::LZ4HDF5DecodeOptions) = true
+
+function try_find_decoded_size(::LZ4HDF5DecodeOptions, src::AbstractVector{UInt8})::Int64
+    if length(src) < 12
+        throw(LZ4DecodingError("unexpected end of input"))
+    else
+        decoded_size = Int64(0)
+        for i in 0:7
+            decoded_size |= Int64(src[begin+i])<<((7-i)*8)
+        end
+        if signbit(decoded_size)
+            throw(LZ4DecodingError("decoded size is negative"))
+        else
+            decoded_size
+        end
+    end
+end
+
+function unsafe_load_i32be(src_p::Ptr{UInt8})::Int32
+    r = Int32(0)
+    for i in 0:3
+        r |= Int32(unsafe_load(src_p+i))<<((3-i)*8)
+    end
+    r
+end
+
+function try_decode!(d::LZ4HDF5DecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
+    check_contiguous(dst)
+    check_contiguous(src)
+    decoded_size = try_find_decoded_size(d, src)
+    src_size::Int64 = length(src)
+    dst_size::Int64 = length(dst)
+    if decoded_size > dst_size
+        return nothing
+    end
+    cconv_src = Base.cconvert(Ptr{UInt8}, src)
+    cconv_dst = Base.cconvert(Ptr{UInt8}, dst)
+    GC.@preserve cconv_src cconv_dst begin
+        src_p = Base.unsafe_convert(Ptr{UInt8}, cconv_src)
+        dst_p = Base.unsafe_convert(Ptr{UInt8}, cconv_dst)
+        src_left = src_size
+        dst_left = decoded_size
+        @assert src_left ≥ 12 # this is checked by try_find_decoded_size
+        src_left -= 8
+        src_p += 8
+        block_size = unsafe_load_i32be(src_p)
+        src_left -= 4
+        src_p += 4
+        if block_size ≤ 0
+            throw(LZ4DecodingError("block size must be greater than zero"))
+        end
+        while dst_left > 0
+            local b_size = min(Int64(block_size), dst_left)%Int32
+            if src_left < 4
+                throw(LZ4DecodingError("unexpected end of input"))
+            end
+            local c_size = unsafe_load_i32be(src_p)
+            src_left -= 4
+            src_p += 4
+            if c_size ≤ 0
+                throw(LZ4DecodingError("block compressed size must be greater than zero"))
+            end
+            if src_left < c_size
+                throw(LZ4DecodingError("unexpected end of input"))
+            end
+            if c_size == b_size # There was no compression
+                Libc.memcpy(dst_p, src_p, b_size)
+            else # do the decompression
+                local ret = unsafe_lz4_decompress(src_p, dst_p, c_size, b_size)
+                if signbit(ret)
+                    throw(LZ4DecodingError("src is malformed"))
+                elseif ret != b_size
+                    throw(LZ4DecodingError("saved decoded size is not correct"))
+                end
+            end
+            src_left -= c_size
+            src_p += c_size
+            dst_left -= b_size
+            dst_p += b_size
+        end
+        if !iszero(src_left)
+            throw(LZ4DecodingError("unexpected $(src_left) bytes after stream"))
+        end
+        return decoded_size
+    end
+end
@@ -257,3 +257,118 @@ function try_encode!(e::LZ4NumcodecsEncodeOptions, dst::AbstractVector{UInt8}, s
         Int64(ret) + Int64(4)
     end
 end
+
+
+"""
+    struct LZ4HDF5EncodeOptions <: EncodeOptions
+    LZ4HDF5EncodeOptions(; kwargs...)
+
+LZ4 HDF5 format compression using liblz4: https://lz4.org/
+
+This is the LZ4 HDF5 format used in HDF5 Filter ID: 32004.
+
+This format is documented in https://github.com/HDFGroup/hdf5_plugins
+
+This format is NOT compatible with the `lz4` CLI.
+
+# Keyword Arguments
+
+- `codec::LZ4HDF5Codec=LZ4HDF5Codec()`
+- `compressionLevel::Integer=0`: Compression level, 0: default (fast mode); values > $(LZ4_MAX_CLEVEL) count as $(LZ4_MAX_CLEVEL); values < 0 trigger fast acceleration.
+- `blockSize::Integer=2^30`: Decompressed bytes per block. Must be in `1:$(LZ4_MAX_INPUT_SIZE)`.
+"""
+struct LZ4HDF5EncodeOptions <: EncodeOptions
+    codec::LZ4HDF5Codec
+    compressionLevel::Int32
+    blockSize::Int32
+end
+function LZ4HDF5EncodeOptions(;
+        codec::LZ4HDF5Codec=LZ4HDF5Codec(),
+        compressionLevel::Integer=0,
+        blockSize::Integer=2^30,
+        kwargs...
+    )
+    check_in_range(1:LZ4_MAX_INPUT_SIZE; blockSize)
+    _clamped_compression_level = clamp(compressionLevel, LZ4_MIN_CLEVEL, LZ4_MAX_CLEVEL)
+    LZ4HDF5EncodeOptions(codec, _clamped_compression_level, blockSize)
+end
+
+is_thread_safe(::LZ4HDF5EncodeOptions) = true
+
+# Prevent encode_bound reaching typemax(Int64) if blockSize is 1
+decoded_size_range(e::LZ4HDF5EncodeOptions) = Int64(0):Int64(1):Int64(1844674407370955155)
+
+function encode_bound(e::LZ4HDF5EncodeOptions, src_size::Int64)::Int64
+    if src_size > last(decoded_size_range(e))
+        typemax(Int64)
+    else
+        block_size = clamp(src_size, Int64(1), Int64(e.blockSize))
+        nblocks = cld(src_size, block_size)
+        lz4_scratch_space = block_size÷Int64(255) + Int64(16)
+        src_size + Int64(4)*nblocks + lz4_scratch_space + Int64(12)
+    end
+end
+
+function try_encode!(e::LZ4HDF5EncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
+    check_contiguous(dst)
+    check_contiguous(src)
+    src_size::Int64 = length(src)
+    dst_size::Int64 = length(dst)
+    check_in_range(decoded_size_range(e); src_size)
+    block_size = clamp(src_size, Int64(1), Int64(e.blockSize))
+    if dst_size < 12
+        return nothing
+    end
+    cconv_src = Base.cconvert(Ptr{UInt8}, src)
+    cconv_dst = Base.cconvert(Ptr{UInt8}, dst)
+    GC.@preserve cconv_src cconv_dst begin
+        src_p = Base.unsafe_convert(Ptr{UInt8}, cconv_src)
+        dst_p = Base.unsafe_convert(Ptr{UInt8}, cconv_dst)
+        src_left = src_size
+        dst_left = dst_size
+        # Store original size as big endian signed 64 bit
+        for i in 0:7
+            unsafe_store!(dst_p+i, (src_size>>>((7-i)*8))%UInt8)
+        end
+        dst_left -= 8
+        dst_p += 8
+        # Store block size as big endian signed 32 bit
+        for i in 0:3
+            unsafe_store!(dst_p+i, (block_size>>>((3-i)*8))%UInt8)
+        end
+        dst_left -= 4
+        dst_p += 4
+        while src_left > 0
+            if dst_left < 5
+                return nothing
+            end
+            local b_size = min(src_left, block_size)%Int32
+            @assert !iszero(b_size)
+            local c_size_p = dst_p
+            dst_left -= 4
+            dst_p += 4
+            local ret = unsafe_lz4_compress(src_p, dst_p, b_size, clamp(dst_left, Int32), e.compressionLevel)
+            # Store the data directly if there was no compression
+            # iszero(ret) indicates that dst_left was too small for compression.
+            # but it might be large enough for a copy.
+            local c_size = if ret ≥ b_size || iszero(ret)
+                if dst_left < b_size
+                    return nothing
+                end
+                Libc.memcpy(dst_p, src_p, b_size)
+                b_size
+            else
+                ret
+            end
+            # Store block compressed size as big endian signed 32 bit
+            for i in 0:3
+                unsafe_store!(c_size_p+i, (c_size>>>((3-i)*8))%UInt8)
+            end
+            dst_left -= c_size
+            dst_p += c_size
+            src_left -= b_size
+            src_p += b_size
+        end
+        return dst_size - dst_left
+    end
+end