diff --git a/Project.toml b/Project.toml index f09029a..4495ca0 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.9.5" [deps] AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" ChunkCodecLibZlib = "4c0bbee4-addc-4d73-81a0-b6caacae83c8" ChunkCodecLibZstd = "55437552-ac27-4d47-9aa3-63184e8fd398" @@ -27,6 +28,7 @@ Blosc = "0.5, 0.6, 0.7" ChunkCodecCore = "1" ChunkCodecLibZlib = "1" ChunkCodecLibZstd = "1" +CRC32c = "1.10, 1.11" DataStructures = "0.17, 0.18, 0.19" DateTimes64 = "1" DiskArrays = "0.4.2" diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 6b9f6b3..ee70565 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -197,7 +197,7 @@ Order : C Read-Only : false Compressor : Zarr.BloscCompressor(0, 3, "zstd", 1) Filters : nothing -Store type : Dictionary Storage +Store type : Zarr.VersionedStore{2, '.', Zarr.DictStore}(Dictionary Storage) No. bytes : 400000000 No. bytes stored : 2412289 Storage ratio : 165.81761140559857 diff --git a/src/Codecs/Codecs.jl b/src/Codecs/Codecs.jl new file mode 100644 index 0000000..ec6e620 --- /dev/null +++ b/src/Codecs/Codecs.jl @@ -0,0 +1,49 @@ +module Codecs + +using JSON: JSON + +""" + abstract type Codec + +The abstract supertype for all Zarr codecs + +## Interface + +All subtypes of `Codec` SHALL implement the following methods: + +- `zencode(a, c::Codec)`: compress the array `a` using the codec `c`. +- `zdecode(a, c::Codec, T)`: decode the array `a` using the codec `c` + and return an array of type `T`. +- `JSON.lower(c::Codec)`: return a JSON representation of the codec `c`, which + follows the Zarr specification for that codec. +- `getCodec(::Type{<:Codec}, d::Dict)`: return a codec object from a given + dictionary `d` which contains the codec's parameters according to the Zarr spec. + +Subtypes of `Codec` MAY also implement the following methods: + +- `zencode!(encoded, data, c::Codec)`: encode the array `data` using the + codec `c` and store the result in the array `encoded`. +- `zdecode!(data, encoded, c::Codec)`: decode the array `encoded` + using the codec `c` and store the result in the array `data`. + +Finally, an entry MUST be added to the `VN.codectypes` dictionary for each codec type where N is the +Zarr format version. +This must also follow the Zarr specification's name for that compressor. The name of the compressor +is the key, and the value is the compressor type (e.g. `BloscCodec` or `NoCodec`). + +For example, the Blosc codec is named "blosc" in the Zarr spec, so the entry for [`BloscCodec`](@ref) +must be added to `codectypes` as `codectypes["blosc"] = BloscCodec`. +""" + +abstract type Codec end + +zencode(a, c::Codec) = error("Unimplemented") +zencode!(encoded, data, c::Codec) = error("Unimplemented") +zdecode(a, c::Codec, T::Type) = error("Unimplemented") +zdecode!(data, encoded, c::Codec) = error("Unimplemented") +JSON.lower(c::Codec) = error("Unimplemented") +getCodec(::Type{<:Codec}, d::Dict) = error("Unimplemented") + +include("V3/V3.jl") + +end diff --git a/src/Codecs/V3/V3.jl b/src/Codecs/V3/V3.jl new file mode 100644 index 0000000..710946c --- /dev/null +++ b/src/Codecs/V3/V3.jl @@ -0,0 +1,103 @@ +module V3Codecs + +import ..Codecs: zencode, zdecode, zencode!, zdecode! +using CRC32c: CRC32c + +abstract type V3Codec{In,Out} end +const codectypes = Dict{String, V3Codec}() + +@enum BloscCompressor begin + lz4 + lz4hc + blosclz + zstd + snappy + zlib +end + +@enum BloscShuffle begin + noshuffle + shuffle + bitshuffle +end + +struct BloscCodec <: V3Codec{:bytes, :bytes} + cname::BloscCompressor + clevel::Int64 + shuffle::BloscShuffle + typesize::UInt8 + blocksize::UInt +end +name(::BloscCodec) = "blosc" + +struct BytesCodec <: V3Codec{:array, :bytes} +end +name(::BytesCodec) = "bytes" + +struct CRC32cCodec <: V3Codec{:bytes, :bytes} +end +name(::CRC32cCodec) = "crc32c" + +struct GzipCodec <: V3Codec{:bytes, :bytes} +end +name(::GzipCodec) = "gzip" + + +#= +zencode(a, c::Codec) = error("Unimplemented") +zencode!(encoded, data, c::Codec) = error("Unimplemented") +zdecode(a, c::Codec, T::Type) = error("Unimplemented") +zdecode!(data, encoded, c::Codec) = error("Unimplemented") +=# + +function crc32c_stream!(output::IO, input::IO; buffer = Vector{UInt8}(undef, 1024*32)) + hash::UInt32 = 0x00000000 + while(bytesavailable(input) > 0) + sized_buffer = @view(buffer[1:min(length(buffer), bytesavailable(input))]) + read!(input, sized_buffer) + write(output, sized_buffer) + hash = CRC32c.crc32c(sized_buffer, hash) + end + return hash +end +function zencode!(encoded::Vector{UInt8}, data::Vector{UInt8}, c::CRC32cCodec) + output = IOBuffer(encoded, read=false, write=true) + input = IOBuffer(data, read=true, write=false) + zencode!(output, input, c) + return take!(output) +end +function zencode!(output::IO, input::IO, c::CRC32cCodec) + hash = crc32c_stream!(output, input) + write(output, hash) + return output +end +function zdecode!(encoded::Vector{UInt8}, data::Vector{UInt8}, c::CRC32cCodec) + output = IOBuffer(encoded, read=false, write=true) + input = IOBuffer(data, read=true, write=true) + zdecode!(output, input, c) + return take!(output) +end +function zdecode!(output::IOBuffer, input::IOBuffer, c::CRC32cCodec) + input_vec = take!(input) + truncated_input = IOBuffer(@view(input_vec[1:end-4]); read=true, write=false) + hash = crc32c_stream!(output, truncated_input) + if input_vec[end-3:end] != reinterpret(UInt8, [hash]) + throw(IOError("CRC32c hash does not match")) + end + return output +end + +struct ShardingCodec{N} <: V3Codec{:array, :bytes} + chunk_shape::NTuple{N,Int} + codecs::Vector{V3Codec} + index_codecs::Vector{V3Codec} + index_location::Symbol +end +name(::ShardingCodec) = "sharding_indexed" + +struct TransposeCodec <: V3Codec{:array, :array} +end +name(::TransposeCodec) = "transpose" + + +end diff --git a/src/Compressors/Compressors.jl b/src/Compressors/Compressors.jl index 1854128..c647eff 100644 --- a/src/Compressors/Compressors.jl +++ b/src/Compressors/Compressors.jl @@ -49,10 +49,13 @@ const compressortypes = Dict{Union{String,Nothing}, Type{<: Compressor}}() include("blosc.jl") include("zlib.jl") include("zstd.jl") +include("v3.jl") # ## Fallback definitions for the compressor interface # Define fallbacks and generic methods for the compressor interface -getCompressor(compdict::Dict) = getCompressor(compressortypes[compdict["id"]],compdict) +getCompressor(compdict::Dict) = haskey(compdict, "id") ? + getCompressor(compressortypes[compdict["id"]], compdict) : + getCompressor(compressortypes[compdict["name"]], compdict["configuration"]) getCompressor(::Nothing) = NoCompressor() # Compression when no filter is given @@ -104,4 +107,4 @@ end JSON.lower(::NoCompressor) = nothing -compressortypes[nothing] = NoCompressor \ No newline at end of file +compressortypes[nothing] = NoCompressor diff --git a/src/Compressors/v3.jl b/src/Compressors/v3.jl new file mode 100644 index 0000000..955c116 --- /dev/null +++ b/src/Compressors/v3.jl @@ -0,0 +1,58 @@ +""" + Compressor v3{C <: Compressor} <: Compressor + +Wrapper to indicate Zarr v3 of a compressor +""" +struct Compressor_v3{C} <: Compressor + parent::C +end +Base.parent(c::Compressor_v3) = c.parent + +function zuncompress(a, z::Compressor_v3, T) + zuncompress(a, parent(z), T) +end + +function zuncompress!(data::DenseArray, compressed, z::Compressor_v3) + zuncompress!(data, compressed, parent(z)) +end + +function zcompress(a, z::Compressor_v3) + zcompress(a, parent(z)) +end + + +function JSON.lower(c::Compressor_v3{BloscCompressor}) + p = parent(c) + return Dict( + "name" => "blosc", + "configuration" => Dict( + "cname" => p.cname, + "clevel" => p.clevel, + "shuffle" => p.shuffle, +# TODO: Evaluate if we can encode typesize +# "typesize" => p.typesize, + "blocksize" => p.blocksize + ) + ) +end + +function JSON.lower(c::Compressor_v3{ZlibCompressor}) + p = parent(c) + return Dict( + "name" => "gzip", + "configuration" => Dict( + "level" => p.clevel + ) + ) +end + +function JSON.lower(c::Compressor_v3{ZstdCompressor}) + p = parent(c) + return Dict( + "name" => "zstd", + "configuration" => Dict( + "level" => p.config.compressionlevel, + "checksum" => p.config.checksum + ) + ) +end diff --git a/src/Compressors/zstd.jl b/src/Compressors/zstd.jl index c0e0254..6cd80a0 100644 --- a/src/Compressors/zstd.jl +++ b/src/Compressors/zstd.jl @@ -4,6 +4,7 @@ This file implements a Zstd compressor via ChunkCodecLibZstd.jl. =# + using ChunkCodecLibZstd: ZstdEncodeOptions using ChunkCodecCore: encode, decode, decode! @@ -51,4 +52,4 @@ function JSON.lower(z::ZstdCompressor) end end -Zarr.compressortypes["zstd"] = ZstdCompressor \ No newline at end of file +Zarr.compressortypes["zstd"] = ZstdCompressor diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index 46c819a..2fb1328 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -108,8 +108,12 @@ function writeattrs(s::AbstractStore, p, att::Dict; indent_json::Bool= false) att end -is_zgroup(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) -is_zarray(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) +is_zarr3(s::AbstractStore, p) = isinitialized(s,_concatpath(p,"zarr.json")) +is_zarr2(s::AbstractStore, p) = is_z2array(s, p) || is_z2group(s,p) +is_zgroup(s::AbstractStore, p) = is_z2group(s,p) +is_zarray(s::AbstractStore, p) = is_z2array(s,p) +is_z2group(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) +is_z2array(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) isinitialized(s::AbstractStore, p, i::CartesianIndex)=isinitialized(s,p,citostring(i)) isinitialized(s::AbstractStore, p, i) = isinitialized(s,_concatpath(p,i)) @@ -197,6 +201,7 @@ isemptysub(s::AbstractStore, p) = isempty(subkeys(s,p)) && isempty(subdirs(s,p)) #during auto-check of storage format when doing zopen storageregexlist = Pair[] +include("formattedstore.jl") include("directorystore.jl") include("dictstore.jl") include("s3store.jl") diff --git a/src/Storage/formattedstore.jl b/src/Storage/formattedstore.jl new file mode 100644 index 0000000..750ec6e --- /dev/null +++ b/src/Storage/formattedstore.jl @@ -0,0 +1,230 @@ +# Default Zarr version +const DV = 2 + +# Default Zarr separator + +# Default Zarr v2 separator +const DS2 = '.' +# Default Zarr v3 separator +const DS3 = '/' + +default_sep(version) = version == 2 ? DS2 : + version == 3 ? DS3 : + error("Unknown version: $version") +const DS = default_sep(DV) + +# Chunk Key Encodings for Zarr v3 +# A Char is the separator for the default chunk key encoding +abstract type ChunkKeyEncoding end +struct V2ChunkKeyEncoding{SEP} <: ChunkKeyEncoding end +separator(c::Char) = c +separator(v2cke::V2ChunkKeyEncoding{SEP}) where SEP = SEP + +""" + FormattedStore{V,CKE,STORE <: AbstractStore} <: AbstractStore + +FormattedStore wraps an AbstractStore to indicate a specific Zarr format. +The path of a chunk depends on the version and chunk key encoding. + +# Type Parameters + +- V: Zarr format version +- CKE: Chunk key encoding or dimension separator. + CKE could be a `Char` or a subtype of `ChunkKeyEncoding`. +- STORE: Type of AbstractStore wrapped + +# Chunk Path Formats + +## Zarr version 2 + +### '.' dimension separator (default) + +Chunks are encoded as "1.2.3" + +### '/' dimension separator + +Chunks are encoded as "1/2/3" + +## Zarr version 3 + +### '/' dimension separator (default) + +Chunks are encoded as "c/1/2/3" + +### '.' dimension separator + +Chunks are encoded as "c.1.2.3" + +### V2ChunkKeyEncoding{SEP} + +See Zarr version 2 +""" +struct FormattedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore + parent::STORE +end +FormattedStore(args...) = FormattedStore{DV,DS}(args...) +FormattedStore(s::FormattedStore) = s +FormattedStore{V}(args...) where V = FormattedStore{V, default_sep(V)}(args...) +FormattedStore{V}(s::FormattedStore{<:Any,S}) where {V,S} = FormattedStore{V, S}(s) +FormattedStore{<: Any, S}(args...) where S = FormattedStore{DV, S}(args...) +FormattedStore{<: Any, S}(s::FormattedStore{V}) where {V,S} = FormattedStore{V, S}(s) +function FormattedStore{V,S}(store::AbstractStore) where {V,S} + return FormattedStore{V,S,typeof(store)}(store) +end +function FormattedStore{V,S}(store::FormattedStore) where {V,S} + p = parent(store) + return FormattedStore{V,S,typeof(p)}(p) +end + +Base.parent(store::FormattedStore) = store.parent + +@inline citostring(i::CartesianIndex, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) +@inline citostring(::CartesianIndex{0}, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) +@inline citostring(i::CartesianIndex, ::Int, ::Type{V2ChunkKeyEncoding{S}}) where S = citostring(i, 2, S) +citostring(i::CartesianIndex, s::FormattedStore{V, S}) where {V,S} = citostring(i, V, S) + +Base.getindex(s::FormattedStore, p, i::CartesianIndex) = s[p, citostring(i,s)] +Base.delete!(s::FormattedStore, p, i::CartesianIndex) = delete!(s, p, citostring(i,s)) +Base.setindex!(s::FormattedStore, v, p, i::CartesianIndex) = s[p, citostring(i,s)]=v + +isinitialized(s::FormattedStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) + +""" +- [`storagesize(d::AbstractStore, p::AbstractString)`](@ref storagesize) +- [`subdirs(d::AbstractStore, p::AbstractString)`](@ref subdirs) +- [`subkeys(d::AbstractStore, p::AbstractString)`](@ref subkeys) +- [`isinitialized(d::AbstractStore, p::AbstractString)`](@ref isinitialized) +- [`storefromstring(::Type{<: AbstractStore}, s, _)`](@ref storefromstring) +- `Base.getindex(d::AbstractStore, i::AbstractString)`: return the data stored in key `i` as a Vector{UInt8} +- `Base.setindex!(d::AbstractStore, v, i::AbstractString)`: write the values in `v` to the key `i` of the given store `d` +""" + +storagesize(d::FormattedStore, p::AbstractString) = storagesize(parent(d), p) +subdirs(d::FormattedStore, p::AbstractString) = subdirs(parent(d), p) +subkeys(d::FormattedStore, p::AbstractString) = subkeys(parent(d), p) +isinitialized(d::FormattedStore, p::AbstractString) = isinitialized(parent(d), p) +storefromstring(::Type{FormattedStore{<: Any, <: Any, STORE}}, s, _) where STORE = FormattedStore{DV,DS}(storefromstring(STORE, s)) +storefromstring(::Type{FormattedStore{V,S}}, s, _) where {V,S} = FormattedStore{DV,DS}(storefromstring(s)) +storefromstring(::Type{FormattedStore{V,S,STORE}}, s, _) where {V,S,STORE} = FormattedStore{V,S,STORE}(storefromstring(STORE, s)) +Base.getindex(d::FormattedStore, i::AbstractString) = getindex(parent(d), i) +Base.setindex!(d::FormattedStore, v, i::AbstractString) = setindex!(parent(d), v, i) +Base.delete!(d::FormattedStore, i::AbstractString) = delete!(parent(d), i) + + +function Base.getproperty(store::FormattedStore{V,S}, sym::Symbol) where {V,S} + if sym == :dimension_separator + return S + elseif sym == :zarr_format + return V + elseif sym ∈ propertynames(getfield(store, :parent)) + # Support forwarding of properties to parent + return getproperty(store.parent, sym) + else + getfield(store, sym) + end +end +function Base.propertynames(store::FormattedStore) + return (:dimension_separator, :zarr_format, fieldnames(typeof(store))..., propertynames(store.parent)...) +end + + +""" + Zarr.set_dimension_separator(store::FormattedStore{V}, sep::Char)::FormattedStore{V,sep} + +Returns a FormattedStore of the same type with the same `zarr_format` parameter, `V`, +but with a dimension separator of `sep`. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_dimension_separator(Zarr.FormattedStore{2, '.'}(Zarr.DictStore(), '/')) |> typeof +Zarr.FormattedStore{2, '/',Zarr.DictStore} +``` + +""" +function set_dimension_separator(store::FormattedStore{V}, sep::Char) where V + return FormattedStore{V,sep}(store) +end +function set_dimension_separator(store::AbstractStore, sep::Char) + return FormattedStore{<: Any,sep}(store) +end + +""" + set_zarr_format(::FormattedStore{<: Any, S}, zarr_format::Int)::FormattedStore{zarr_format,S} + +Returns a FormattedStore of the same type with the same `dimension_separator` parameter, `S`, +but with the specified `zarr_format` parameter. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_zarr_format(Zarr.FormattedStore{2, '.'}(Zarr.DictStore(), 3)) |> typeof +Zarr.FormattedStore{3, '.', DictStore} +``` + +""" +function set_zarr_format(store::FormattedStore{<: Any, S}, zarr_format::Int) where S + return FormattedStore{zarr_format,S}(store) +end +function set_zarr_format(store::AbstractStore, zarr_format::Int) + return FormattedStore{zarr_format}(store) +end + +dimension_separator(::AbstractStore) = DS +dimension_separator(::FormattedStore{<: Any,S}) where S = S +zarr_format(::AbstractStore) = DV +zarr_format(::FormattedStore{V}) where V = V + +is_zgroup(s::FormattedStore{3}, p, metadata=getmetadata(s, p, false)) = + isinitialized(s,_concatpath(p,"zarr.json")) && + metadata.node_type == "group" +is_zarray(s::FormattedStore{3}, p, metadata=getmetadata(s, p, false)) = + isinitialized(s,_concatpath(p,"zarr.json")) && + metadata.node_type == "array" + +getmetadata(s::FormattedStore{3}, p,fill_as_missing) = Metadata(String(maybecopy(s[p,"zarr.json"])),fill_as_missing) +function writemetadata(s::FormattedStore{3}, p, m::Metadata; indent_json::Bool= false) + met = IOBuffer() + + if indent_json + JSON.print(met,m,4) + else + JSON.print(met,m) + end + + s[p,"zarr.json"] = take!(met) + m +end + +function getattrs(s::FormattedStore{3}) + md = s[p,"zarr.json"] + if md === nothing + error("zarr.json not found") + else + md = JSON.parse(replace(String(maybecopy(md)),": NaN,"=>": \"NaN\",")) + return get(md, "attributes", Dict{String, Any}()) + end +end + +function writeattrs(s::FormattedStore{3}, p, att::Dict; indent_json::Bool= false) + # This is messy, we need to open zarr.json and replace the attributes section + md = s[p,"zarr.json"] + if md === nothing + error("zarr.json not found") + else + md = JSON.parse(replace(String(maybecopy(md)),": NaN,"=>": \"NaN\",")) + end + md = Dict(md) + md["attributes"] = att + + b = IOBuffer() + + if indent_json + JSON.print(b,md,4) + else + JSON.print(b,md) + end + + s[p,"zarr.json"] = take!(b) + att +end diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 9b68cb1..980284f 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -13,8 +13,8 @@ python package. In case you experience performance issues, one can try to use struct HTTPStore <: AbstractStore url::String allowed_codes::Set{Int} + HTTPStore(url, allowed_codes = Set((404,))) = new(url, allowed_codes) end -HTTPStore(url) = HTTPStore(url,Set((404,))) function Base.getindex(s::HTTPStore, k::String) r = HTTP.request("GET",string(s.url,"/",k),status_exception = false,socket_type_tls=OpenSSL.SSLStream) @@ -39,7 +39,21 @@ end push!(storageregexlist,r"^https://"=>HTTPStore) push!(storageregexlist,r"^http://"=>HTTPStore) -storefromstring(::Type{<:HTTPStore}, s,_) = ConsolidatedStore(HTTPStore(s),""),"" +function storefromstring(::Type{<:HTTPStore}, s,_) + http_store = HTTPStore(s) + try + if http_store["", ".zmetadata"] !== nothing + http_store = ConsolidatedStore(http_store,"") + end + if is_zarray(http_store, "") + meta = getmetadata(http_store, "", false) + http_store = FormattedStore{meta.zarr_format, meta.dimension_separator}(http_store) + end + catch err + @warn exception=err "Additional metadata was not available for HTTPStore." + end + return http_store,"" +end """ missing_chunk_return_code!(s::HTTPStore, code::Union{Int,AbstractVector{Int}}) diff --git a/src/ZArray.jl b/src/ZArray.jl index b0ccfc0..bb20004 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -303,6 +303,7 @@ Creates a new empty zarr array with element type `T` and array dimensions `dims` * `path=""` directory name to store a persistent array. If left empty, an in-memory array will be created * `name=""` name of the zarr array, defaults to the directory name +* `zarr_format`=$(DV) Zarr format version (2 or 3) * `storagetype` determines the storage to use, current options are `DirectoryStore` or `DictStore` * `chunks=dims` size of the individual array chunks, must be a tuple of length `length(dims)` * `fill_value=nothing` value to represent missing values @@ -312,23 +313,33 @@ Creates a new empty zarr array with element type `T` and array dimensions `dims` * `attrs=Dict()` a dict containing key-value pairs with metadata attributes associated to the array * `writeable=true` determines if the array is opened in read-only or write mode * `indent_json=false` determines if indents are added to format the json files `.zarray` and `.zattrs`. This makes them more readable, but increases file size. +* `dimension_separator='.'` sets how chunks are encoded. The Zarr v2 default is '.' such that the first 3D chunk would be `0.0.0`. The Zarr v3 default is `/`. """ function zcreate(::Type{T}, dims::Integer...; name="", path=nothing, + zarr_format=DV, + dimension_separator=default_sep(zarr_format), kwargs... ) where T + + if dimension_separator isa AbstractString + # Convert AbstractString to Char + dimension_separator = only(dimension_separator) + end + if path===nothing - store = DictStore() + store = FormattedStore{zarr_format, dimension_separator}(DictStore()) else - store = DirectoryStore(joinpath(path,name)) + store = FormattedStore{zarr_format, dimension_separator}(DirectoryStore(joinpath(path,name))) end - zcreate(T, store, dims...; kwargs...) + zcreate(T, store, dims...; zarr_format, kwargs...) end function zcreate(::Type{T},storage::AbstractStore, dims...; path = "", + zarr_format = DV, chunks=dims, fill_value=nothing, fill_as_missing=false, @@ -336,15 +347,27 @@ function zcreate(::Type{T},storage::AbstractStore, filters = filterfromtype(T), attrs=Dict(), writeable=true, - indent_json=false - ) where T + indent_json=false, + dimension_separator=nothing + ) where {T} + + if isnothing(dimension_separator) + dimension_separator = Zarr.dimension_separator(storage) + elseif dimension_separator != Zarr.dimension_separator(storage) + error("The dimension separator keyword value, $dimension_separator, + must agree with the dimension separator type parameter, $(Zarr.dimension_separator(storage))") + end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) N = length(dims) C = typeof(compressor) + if fill_value === nothing && zarr_format == 3 + fill_value = zero(T) + end T2 = (fill_value === nothing || !fill_as_missing) ? T : Union{T,Missing} - metadata = Metadata{T2, N, C, typeof(filters)}( - 2, + metadata = Metadata{T2, N, C, typeof(filters), dimension_separator}( + zarr_format, + "array", dims, chunks, typestr(T), diff --git a/src/ZGroup.jl b/src/ZGroup.jl index be2b0d1..031d33f 100644 --- a/src/ZGroup.jl +++ b/src/ZGroup.jl @@ -20,10 +20,21 @@ function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: Abstract for d in subdirs(s,path) dshort = split(d,'/')[end] - m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) - if isa(m, ZArray) + subpath = _concatpath(path,dshort) + if is_zarr2(s, subpath) + # check for zarr2 first + elseif is_zarr3(s, subpath) + s = set_zarr_format(s, 3) + end + if is_zarray(s, subpath) + meta = getmetadata(s, subpath, false) + if dimension_separator(s) != meta.dimension_separator + s = set_dimension_separator(s, meta.dimension_separator) + end + m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) arrays[dshort] = m - elseif isa(m, ZGroup) + elseif is_zgroup(s, subpath) + m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) groups[dshort] = m end end @@ -37,9 +48,9 @@ end Works like `zopen` with the single difference that no error is thrown when the path or store does not point to a valid zarr array or group, but nothing -is returned instead. +is returned instead. """ -function zopen_noerr(s::AbstractStore, mode="r"; +function zopen_noerr(s::AbstractStore, mode="r"; consolidated = false, path="", lru = 0, @@ -116,8 +127,21 @@ function storefromstring(s, create=true) return storefromstring(t,s,create) end end - if create || isdir(s) - return DirectoryStore(s), "" + if create + return FormattedStore(DirectoryStore(s)), "" + elseif isdir(s) + # parse metadata to determine store kind + temp_store = DirectoryStore(s) + if is_zarr3(temp_store, "") + temp_store = set_zarr_format(temp_store, 3) + end + if is_zarray(temp_store, "") + meta = getmetadata(temp_store, "", false) + store = FormattedStore{meta.zarr_format, meta.dimension_separator}(temp_store) + else + store = FormattedStore(temp_store) + end + return store, "" else throw(ArgumentError("Path $s is not a directory.")) end @@ -129,7 +153,7 @@ end Create a new zgroup in the store `s` """ function zgroup(s::AbstractStore, path::String=""; attrs=Dict(), indent_json::Bool= false) - d = Dict("zarr_format"=>2) + d = Dict("zarr_format"=>DV) isemptysub(s, path) || error("Store is not empty") b = IOBuffer() diff --git a/src/Zarr.jl b/src/Zarr.jl index dbdeb9a..1783bdf 100644 --- a/src/Zarr.jl +++ b/src/Zarr.jl @@ -4,7 +4,9 @@ import JSON import Blosc include("metadata.jl") +include("metadata3.jl") include("Compressors/Compressors.jl") +include("Codecs/Codecs.jl") include("Storage/Storage.jl") include("Filters/Filters.jl") include("ZArray.jl") diff --git a/src/metadata.jl b/src/metadata.jl index 607d0bf..733018e 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -90,10 +90,20 @@ Each array requires essential configuration metadata to be stored, enabling corr interpretation of the stored data. This metadata is encoded using JSON and stored as the value of the “.zarray” key within an array store. +# Type Parameters +* T - element type of the array +* N - dimensionality of the array +* C - compressor +* F - filters +* S - dimension separator + +# See Also + https://zarr.readthedocs.io/en/stable/spec/v2.html#metadata """ -struct Metadata{T, N, C, F} +struct Metadata{T, N, C, F, S} zarr_format::Int + node_type::String shape::Base.RefValue{NTuple{N, Int}} chunks::NTuple{N, Int} dtype::String # structured data types not yet supported @@ -101,43 +111,85 @@ struct Metadata{T, N, C, F} fill_value::Union{T, Nothing} order::Char filters::F # not yet supported - function Metadata{T2, N, C, F}(zarr_format, shape, chunks, dtype, compressor,fill_value, order, filters) where {T2,N,C,F} + function Metadata{T2, N, C, F, S}(zarr_format, node_type, shape, chunks, dtype, compressor, fill_value, order, filters) where {T2,N,C,F,S} #We currently only support version - zarr_format == 2 || throw(ArgumentError("Zarr.jl currently only support v2 of the protocol")) + # zarr_format == 2 || throw(ArgumentError("Zarr.jl currently only support v2 of the protocol")) + zarr_format == 3 ? @warn("Zarr v3 support is experimental") : + zarr_format == 2 ? nothing : + throw(ArgumentError("Zarr.jl currently only supports v2 or v3 of the specification")) #Do some sanity checks to make sure we have a sane array any(<(0), shape) && throw(ArgumentError("Size must be positive")) any(<(1), chunks) && throw(ArgumentError("Chunk size must be >= 1 along each dimension")) order === 'C' || throw(ArgumentError("Currently only 'C' storage order is supported")) - new{T2, N, C, F}(zarr_format, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters) + new{T2, N, C, F, S}(zarr_format, node_type, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters) + end + function Metadata{T2, N, C, F}( + zarr_format, + node_type, + shape, + chunks, + dtype, + compressor, + fill_value, + order, + filters, + dimension_separator::Char = '.' + ) where {T2,N,C,F} + return Metadata{T2, N, C, F, dimension_separator}( + zarr_format, + node_type, + shape, + chunks, + dtype, + compressor, + fill_value, + order + ) end + end +const DimensionSeparatedMetadata{S} = Metadata{<: Any, <: Any, <: Any, <: Any, S} + +function Base.getproperty(m::DimensionSeparatedMetadata{S}, name::Symbol) where S + if name == :dimension_separator + return S + end + return getfield(m, name) +end +Base.propertynames(m::Metadata) = (fieldnames(Metadata)..., :dimension_separator) + #To make unit tests pass with ref shape import Base.== function ==(m1::Metadata, m2::Metadata) m1.zarr_format == m2.zarr_format && + m1.node_type == m2.node_type && m1.shape[] == m2.shape[] && m1.chunks == m2.chunks && m1.dtype == m2.dtype && m1.compressor == m2.compressor && m1.fill_value == m2.fill_value && m1.order == m2.order && - m1.filters == m2.filters + m1.filters == m2.filters && + m1.dimension_separator == m2.dimension_separator end "Construct Metadata based on your data" function Metadata(A::AbstractArray{T, N}, chunks::NTuple{N, Int}; zarr_format::Integer=2, + node_type::String="array", compressor::C=BloscCompressor(), fill_value::Union{T, Nothing}=nothing, order::Char='C', filters::Nothing=nothing, fill_as_missing = false, + dimension_separator::Char = '.' ) where {T, N, C} T2 = (fill_value === nothing || !fill_as_missing) ? T : Union{T,Missing} - Metadata{T2, N, C, typeof(filters)}( + Metadata{T2, N, C, typeof(filters), dimension_separator}( zarr_format, + node_type, size(A), chunks, typestr(eltype(A)), @@ -154,6 +206,13 @@ Metadata(s::Union{AbstractString, IO},fill_as_missing) = Metadata(JSON.parse(s), function Metadata(d::AbstractDict, fill_as_missing) # create a Metadata struct from it + if d["zarr_format"] == 3 + return Metadata3(d, fill_as_missing) + end + + # Zarr v2 metadata is only for arrays + node_type = "array" + compdict = d["compressor"] if isnothing(compdict) # try the last filter, for Kerchunk compat @@ -174,8 +233,11 @@ function Metadata(d::AbstractDict, fill_as_missing) TU = (fv === nothing || !fill_as_missing) ? T : Union{T,Missing} - Metadata{TU, N, C, F}( + S = only(get(d, "dimension_separator", '.')) + + Metadata{TU, N, C, F, S}( d["zarr_format"], + node_type, NTuple{N, Int}(d["shape"]) |> reverse, NTuple{N, Int}(d["chunks"]) |> reverse, d["dtype"], @@ -188,15 +250,20 @@ end "Describes how to lower Metadata to JSON, used in json(::Metadata)" function JSON.lower(md::Metadata) + if md.zarr_format == 3 + return lower3(md) + end Dict{String, Any}( "zarr_format" => md.zarr_format, + "node_type" => md.node_type, "shape" => md.shape[] |> reverse, "chunks" => md.chunks |> reverse, "dtype" => md.dtype, "compressor" => md.compressor, "fill_value" => fill_value_encoding(md.fill_value), "order" => md.order, - "filters" => md.filters + "filters" => md.filters, + "dimension_separator" => md.dimension_separator ) end diff --git a/src/metadata3.jl b/src/metadata3.jl new file mode 100644 index 0000000..1678927 --- /dev/null +++ b/src/metadata3.jl @@ -0,0 +1,305 @@ +""" +Prototype Zarr version 3 support +""" + +const typemap3 = Dict{String, DataType}() +foreach([Bool, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float16, Float32, Float64]) do t + typemap3[lowercase(string(t))] = t +end +typemap3["complex64"] = ComplexF32 +typemap3["complex128"] = ComplexF64 + +function typestr3(t::Type) + return lowercase(string(t)) +end +# TODO: Check raw types +function typestr3(::Type{NTuple{N,UInt8}}) where {N} + return "r$(N*8)" +end + +function typestr3(s::AbstractString, codecs=nothing) + if !haskey(typemap3, s) + if startswith(s, "r") + num_bits = tryparse(Int, s[2:end]) + if isnothing(num_bits) + raise(ArgumentError("$s is not a known type")) + end + if mod(num_bits, 8) == 0 + return NTuple{num_bits÷8,UInt8} + else + raise(ArgumentError("$s must describe a raw type with bit size that is a multiple of 8 bits")) + end + end + end + return typemap3[s] +end + +function check_keys(d::AbstractDict, keys) + for key in keys + if !haskey(d, key) + throw(ArgumentError("Zarr v3 metadata must have a key called $key")) + end + end +end + +function Metadata3(d::AbstractDict, fill_as_missing) + check_keys(d, ("zarr_format", "node_type")) + + zarr_format = d["zarr_format"]::Int + + node_type = d["node_type"]::String + if node_type ∉ ("group", "array") + throw(ArgumentError("Unknown node_type of $node_type")) + end + + zarr_format == 3 || throw(ArgumentError("Metadata3 only functions if zarr_format == 3")) + + # Groups + if node_type == "group" + # Groups only need zarr_format and node_type + # Optionally they can have attributes + for key in keys(d) + if key ∉ ("zarr_format", "node_type", "attributes") + throw(ArgumentError("Zarr v3 group metadata cannot have a key called $key")) + end + end + + return Metadata{Int,0,Nothing,Nothing,'/'}(zarr_format, node_type, (), (), "", nothing, 0, 'C', nothing) + end + + # Array keys + mandatory_keys = [ + "zarr_format", + "node_type", + "shape", + "data_type", + "chunk_grid", + "chunk_key_encoding", + "fill_value", + "codecs", + ] + optional_keys = [ + "attributes", + "storage_transformers", + "dimension_names", + ] + + check_keys(d, mandatory_keys) + for key in keys(d) + if key ∉ mandatory_keys && key ∉ optional_keys + throw(ArgumentError("Zarr v3 metadata cannot have a key called $key")) + end + end + + # Shape + shape = Int.(d["shape"]) + + # Datatype + data_type = d["data_type"]::String + + # Chunk Grid + chunk_grid = d["chunk_grid"] + if chunk_grid["name"] == "regular" + chunks = Int.(chunk_grid["configuration"]["chunk_shape"]) + if length(shape) != length(chunks) + throw(ArgumentError("Shape has rank $(length(shape)) which does not match the chunk_shape rank of $(length(chunk_shape))")) + end + else + throw(ArgumentError("Unknown chunk_grid of name, $(chunk_grid["name"])")) + end + + # Chunk Key Encoding + chunk_key_encoding = d["chunk_key_encoding"] + if chunk_key_encoding["name"] == "default" + elseif chunk_key_encoding["name"] == "v2" + else + throw(ArgumentError("Unknown chunk_key_encoding of name, $(chunk_key_encoding["name"])")) + end + + + # Codecs + compdict = nothing + + # For transpose codec permutation tracking + default_dim_perm = Tuple(1:length(shape)) + dim_perm = default_dim_perm + + codec_data_type = :array + + function check_codec_data_type(from, to) + codec_data_type == from || + throw(ArgumentError("$codec_name found by codec_data_type is $codec_data_type")) + codec_data_type = to + return nothing + end + + for codec in d["codecs"] + codec_name = codec["name"] + if codec_name == "bytes" + # array -> bytes + check_codec_data_type(:array, :bytes) + if haskey(codec, "configuration") + codec["configuration"]["endian"] == "little" || + throw(ArgumentError("Zarr.jl currently only supports little endian for the bytes codec")) + end + elseif codec_name == "zstd" + # bytes -> bytes + check_codec_data_type(:bytes, :bytes) + compdict = codec + elseif codec_name == "blosc" + # bytes -> bytes + check_codec_data_type(:bytes, :bytes) + compdict = codec + elseif codec_name == "gzip" + # bytes -> bytes + check_codec_data_type(:bytes, :bytes) + compdict = codec + elseif codec_name == "transpose" + # array -> array + check_codec_data_type(:array, :array) + _dim_order = codec["configuration"]["order"] + if _dim_order == "C" + @warn "Transpose codec dimension order of $_dim_order is deprecated" + _dim_order = 1:length(shape) + elseif _dim_order == "F" + @warn "Transpose codec dimension order of $_dim_order is deprecated" + _dim_order = reverse(1:length(shape)) + else + _dim_order = Int.(codec["configuration"]["order"]) .+ 1 + end + dim_perm = dim_perm[_dim_order] + elseif codec_name == "sharding_indexed" + # array -> bytes + check_codec_data_type(:array, :bytes) + throw(ArgumentError("Zarr.jl currently does not support the $(codec["name"]) codec")) + elseif codec_name == "crc32c" + # bytes -> bytes + check_codec_data_type(:bytes, :bytes) + throw(ArgumentError("Zarr.jl currently does not support the $(codec["name"]) codec")) + else + throw(ArgumentError("Zarr.jl currently does not support the $(codec["name"]) codec")) + end + end + + if dim_perm == default_dim_perm + order = 'C' + elseif dim_perm == reverse(default_dim_perm) + order = 'F' + else + throw(ArgumentError("Dimension permutation of $dim_perm is not implemented")) + end + + compressor = getCompressor(compdict) + + # Filters (NOT IMPLEMENTED) + filters = getfilters(d) + + # Type Parameters + T = typestr3(data_type) + N = length(shape) + C = typeof(compressor) + F = typeof(filters) + + fv = fill_value_decoding(d["fill_value"], T)::T + + TU = (fv === nothing || !fill_as_missing) ? T : Union{T,Missing} + + cke_configuration = get(chunk_key_encoding, "configuration") do + Dict{String,Any}() + end + # V2 uses '.' while default CKE uses '/' by default + if chunk_key_encoding["name"] == "v2" + separator = only(get(cke_configuration, "separator", '.')) + S = V2ChunkKeyEncoding{separator}() + elseif chunk_key_encoding["name"] == "default" + S = only(get(cke_configuration, "separator", '/')) + end + + Metadata{TU, N, C, F, S}( + zarr_format, + node_type, + NTuple{N, Int}(shape) |> reverse, + NTuple{N, Int}(chunks) |> reverse, + data_type, + compressor, + fv, + order, + filters, + ) +end + +function lower3(md::Metadata{T}) where T + md.zarr_format == 3 || throw(ArgumentError("lower3 only applies when zarr_format is 3")) + + mandatory_keys = [ + "zarr_format", + "node_type", + "shape", + "data_type", + "chunk_grid", + "chunk_key_encoding", + "fill_value", + "codecs", + ] + optional_keys = [ + "attributes", + "storage_transformers", + "dimension_names", + ] + + chunk_grid = Dict{String,Any}( + "name" => "regular", + "configuration" => Dict{String,Any}( + "chunk_shape" => md.chunks |> reverse + ) + ) + + chunk_key_encoding = Dict{String,Any}( + "name" => isa(md.dimension_separator, Char) ? "default" : + isa(md.dimension_separator, V2ChunkKeyEncoding) ? "v2" : + error("Unknown encoding for $(md.dimension_separator)"), + "configuration" => Dict{String,Any}( + "separator" => separator(md.dimension_separator) + ) + ) + + # TODO: Incorporate filters + codecs = Dict{String,Any}[] + + default_dim_perm = Tuple(0:length(md.shape[])-1) + + # Encode the order as a single transpose codec (array to array) + push!(codecs, + Dict{String,Any}( + "name" => "transpose", + "configuration" => Dict( + "order" => md.order == 'C' ? default_dim_perm : + md.order == 'F' ? reverse(default_dim_perm) : + error("Unable to encode order $(md.order)") + ) + ) + ) + + # Convert from array to bytes + push!(codecs, + Dict{String,Any}( + "name" => "bytes", + "configuration" => Dict{String, Any}( + "endian" => "little" + ) + ) + ) + # Compress bytes to bytes + push!(codecs, JSON.lower(Compressor_v3(md.compressor))) + + Dict{String, Any}( + "zarr_format" => md.zarr_format, + "node_type" => md.node_type, + "shape" => md.shape[] |> reverse, + "data_type" => typestr3(T), + "chunk_grid" => chunk_grid, + "chunk_key_encoding" => chunk_key_encoding, + "fill_value" => fill_value_encoding(md.fill_value)::T, + "codecs" => codecs + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index c23c522..7eef9e8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,11 +15,14 @@ CondaPkg.add("zarr"; version="2.*") @testset "fields" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore} + Zarr.FormattedStore{2, '.', Zarr.DictStore}} + @test :a ∈ propertynames(z.storage) @test length(z.storage.a) === 3 @test length(z.storage.a["0.0"]) === 64 @test eltype(z.storage.a["0.0"]) === UInt8 + @test z.metadata.zarr_format === 2 + @test z.metadata.node_type === "array" @test z.metadata.shape[] === (2, 3) @test z.metadata.order === 'C' @test z.metadata.chunks === (2, 3) @@ -31,16 +34,17 @@ CondaPkg.add("zarr"; version="2.*") @test z.metadata.compressor.shuffle === 1 @test z.attrs == Dict{Any, Any}() @test z.writeable === true + @test z.metadata.dimension_separator === Zarr.DS + @test :dimension_separator ∈ propertynames(z.metadata) @test_throws ArgumentError zzeros(Int64,2,3, chunks = (0,1)) @test_throws ArgumentError zzeros(Int64,0,-1) - @test_throws ArgumentError Zarr.Metadata(zeros(2,2), (2,2), zarr_format = 3) @test_throws ArgumentError Zarr.Metadata(zeros(2,2), (2,2), order = 'F') end @testset "methods" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore} + Zarr.FormattedStore{2, '.', Zarr.DictStore}} @test eltype(z) === Int64 @test ndims(z) === 2 @@ -60,7 +64,7 @@ CondaPkg.add("zarr"; version="2.*") compressor=Zarr.NoCompressor()) @test z.metadata.compressor === Zarr.NoCompressor() - @test z.storage === Zarr.DirectoryStore("$dir/$name") + @test z.storage === Zarr.FormattedStore{2 ,'.'}(Zarr.DirectoryStore("$dir/$name")) @test isdir("$dir/$name") @test ispath("$dir/$name/.zarray") @test ispath("$dir/$name/.zattrs") @@ -69,12 +73,15 @@ CondaPkg.add("zarr"; version="2.*") @test JSON.parsefile("$dir/$name/.zarray") == Dict{String, Any}( "dtype" => " nothing, - "shape" => [3, 2], + "shape" => Any[3, 2], "order" => "C", "zarr_format" => 2, - "chunks" => [3, 2], + "node_type" => "array", + "chunks" => Any[3, 2], "fill_value" => nothing, - "compressor" => nothing) + "compressor" => nothing, + "dimension_separator" => "." + ) # call gc to avoid unlink: operation not permitted (EPERM) on Windows # might be because files are left open # from https://github.com/JuliaLang/julia/blob/f6344d32d3ebb307e2b54a77e042559f42d2ebf6/stdlib/SharedArrays/test/runtests.jl#L146 diff --git a/test/storage.jl b/test/storage.jl index 9e4fac7..162d301 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -8,10 +8,39 @@ @test Zarr.normalize_path("/path/to/a") == "/path/to/a" end +@testset "Version and Dimension Separator" begin + v2cke_period = Zarr.V2ChunkKeyEncoding{'.'} + v2cke_slash = Zarr.V2ChunkKeyEncoding{'/'} + let ci = CartesianIndex() + @test Zarr.citostring(ci, 2, '.') == "0" + @test Zarr.citostring(ci, 2, '/') == "0" + @test Zarr.citostring(ci, 3, v2cke_period) == "0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "0" + @test Zarr.citostring(ci, 3, '.') == "c.0" + @test Zarr.citostring(ci, 3, '/') == "c/0" + end + let ci = CartesianIndex(1,1,1) + @test Zarr.citostring(ci, 2, '.') == "0.0.0" + @test Zarr.citostring(ci, 2, '/') == "0/0/0" + @test Zarr.citostring(ci, 3, v2cke_period) == "0.0.0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "0/0/0" + @test Zarr.citostring(ci, 3, '.') == "c.0.0.0" + @test Zarr.citostring(ci, 3, '/') == "c/0/0/0" + end + let ci = CartesianIndex(1,3,5) + @test Zarr.citostring(ci, 2, '.') == "4.2.0" + @test Zarr.citostring(ci, 2, '/') == "4/2/0" + @test Zarr.citostring(ci, 3, v2cke_period) == "4.2.0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "4/2/0" + @test Zarr.citostring(ci, 3, '.') == "c.4.2.0" + @test Zarr.citostring(ci, 3, '/') == "c/4/2/0" + end +end + """ Function to test the interface of AbstractStore. Every complete implementation should pass this test. """ -function test_store_common(ds) +function test_store_common(ds::Zarr.AbstractStore) @test !Zarr.is_zgroup(ds,"") ds[".zgroup"]=rand(UInt8,50) @test haskey(ds,".zgroup") @@ -31,17 +60,23 @@ function test_store_common(ds) @test Zarr.subdirs(ds,"bar") == String[] #Test getindex and setindex data = rand(UInt8,50) - ds["bar/0.0.0"] = data + V = Zarr.zarr_format(ds) + S = Zarr.dimension_separator(ds) + first_ci_str = Zarr.citostring(CartesianIndex(1,1,1), V, S) + second_ci_str = Zarr.citostring(CartesianIndex(2,1,1), V, S) + ds["bar/" * first_ci_str] = data @test ds["bar/0.0.0"]==data @test Zarr.storagesize(ds,"bar")==50 - @test Zarr.isinitialized(ds,"bar/0.0.0") - @test !Zarr.isinitialized(ds,"bar/0.0.1") + @test Zarr.isinitialized(ds,"bar/" * first_ci_str) + @test !Zarr.isinitialized(ds,"bar/" * second_ci_str) Zarr.writeattrs(ds,"bar",Dict("a"=>"b")) @test Zarr.getattrs(ds,"bar")==Dict("a"=>"b") - delete!(ds,"bar/0.0.0") - @test !Zarr.isinitialized(ds,"bar",CartesianIndex((0,0,0))) - @test !Zarr.isinitialized(ds,"bar/0.0.0") - ds["bar/0.0.0"] = data + delete!(ds,"bar/" * first_ci_str) + @test !Zarr.isinitialized(ds,"bar",CartesianIndex((1,1,1))) + @test !Zarr.isinitialized(ds,"bar/" * first_ci_str) + ds["bar/" * first_ci_str] = data + @test !Zarr.isinitialized(ds, "bar", CartesianIndex(0,0,0)) + @test Zarr.isinitialized(ds, "bar", CartesianIndex(1,1,1)) #Add tests for empty storage @test Zarr.isemptysub(ds,"ba") @test Zarr.isemptysub(ds,"ba/") @@ -151,6 +186,7 @@ end @testset "Minio S3 storage" begin + @info "Testing Minio S3 storage" A = fill(1.0, 30, 20) chunks = (5,10) metadata = Zarr.Metadata(A, chunks; fill_value=-1.5) @@ -171,6 +207,7 @@ end end @testset "AWS S3 Storage" begin + @info "Testing AWS S3 storage" Zarr.AWSS3.AWS.global_aws_config(Zarr.AWSS3.AWS.AWSConfig(creds=nothing, region="us-west-2")) S3, p = Zarr.storefromstring("s3://mur-sst/zarr-v1") @test Zarr.is_zgroup(S3, p) @@ -183,6 +220,7 @@ end end @testset "GCS Storage" begin + @info "Testing GCS storage" for s in ( "gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/highresSST-present/r1i1p1f1/6hrPlev/psl/gn/v20170706", "https://storage.googleapis.com/cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/highresSST-present/r1i1p1f1/6hrPlev/psl/gn/v20170706", @@ -204,6 +242,7 @@ end end @testset "HTTP Storage" begin + @info "Testing HTTP Storage" s = Zarr.DictStore() g = zgroup(s, attrs = Dict("groupatt"=>5)) a = zcreate(Int,g,"a1",10,20,chunks=(5,5),attrs=Dict("arratt"=>2.5)) @@ -239,6 +278,7 @@ end end @testset "Zip Storage" begin + @info "Testing Zip Storage" s = Zarr.DictStore() g = zgroup(s, attrs = Dict("groupatt"=>5)) a = zcreate(Int,g,"a1",10,20,chunks=(5,5),attrs=Dict("arratt"=>2.5)) @@ -259,4 +299,5 @@ end Zarr.writezip(io, ds) Zarr.ZipStore(take!(io)) end + @info "Finished testing ZipStore" end