Skip to content

format and clean up code #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions docs/src/index.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@

## ZarrDatasets


See the [documentation of JuliaGeo/CommonDataModel.jl](https://juliageo.org/CommonDataModel.jl/stable/) for the full documentation of the API. As a quick reference, here is an example how to create and read a Zarr file store as a quick reference.

### Create a Zarr file store
Expand Down Expand Up @@ -39,15 +37,10 @@ data_units = ds["varname"].attrib["units"]
```



```@autodocs
Modules = [ZarrDatasets]
```





### Differences between Zarr and NetCDF files

* All metadata (in particular attributes) is stored in JSON files for the Zarr format with the following implications:
Expand Down
7 changes: 5 additions & 2 deletions src/ZarrDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ import DiskArrays:
haschunks

import CommonDataModel as CDM
import JSON

using DataStructures
using Zarr
import JSON

export ZarrDataset
export defDim, defVar, defGroup

include("types.jl")
include("dataset.jl")
include("variable.jl")

export ZarrDataset
end
184 changes: 81 additions & 103 deletions src/dataset.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
# Base interface

CDM.name(v::ZarrDataset) = Zarr.zname(v.zgroup)
Base.keys(ds::ZarrDataset) = keys(ds.zgroup.arrays)
Base.haskey(ds::ZarrDataset,varname::SymbolOrString) = haskey(ds.zgroup.arrays,String(varname))
Base.haskey(ds::ZarrDataset, varname::SymbolOrString) = haskey(ds.zgroup.arrays, String(varname))

# CommonDataModel.jl interface

function CDM.variable(ds::ZarrDataset,varname::SymbolOrString)
CDM.name(v::ZarrDataset) = Zarr.zname(v.zgroup)
function CDM.variable(ds::ZarrDataset, varname::SymbolOrString)
zarray = ds.zgroup.arrays[String(varname)]
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray,ds)
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray, ds)
end

CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))
CDM.dim(ds::ZarrDataset, dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]
CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)
CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
CDM.attrib(ds::ZarrDataset, name::SymbolOrString) = ds.zgroup.attrs[String(name)]

# function CDM.unlimited(ds::ZarrDataset)
# ul = ds.unlimited
Expand All @@ -18,85 +24,59 @@
# return ul
# end

function _dim(ds::ZarrDataset,dimname::SymbolOrString)
dimlen = get(ds.dimensions,Symbol(dimname),nothing)

if !isnothing(dimlen)
return dimlen
end

if ds.parentdataset !== nothing
return _dim(ds.parentdataset,dimname)
end

error("dimension $dimname is not defined")
end

CDM.dim(ds::ZarrDataset,dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]

function CDM.defDim(ds::ZarrDataset,dimname::SymbolOrString,dimlen)
function CDM.defDim(ds::ZarrDataset, dimname::SymbolOrString, dimlen)
dn = Symbol(dimname)
@assert !haskey(ds.dimensions,dn)
@assert !haskey(ds.dimensions, dn)
ds.dimensions[dn] = dimlen
end

CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)

CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
CDM.attrib(ds::ZarrDataset,name::SymbolOrString) = ds.zgroup.attrs[String(name)]

function CDM.defAttrib(ds::ZarrDataset,name::SymbolOrString,value)
function CDM.defAttrib(ds::ZarrDataset, name::SymbolOrString, value)
@assert iswritable(ds)
ds.zgroup.attrs[String(name)] = value

storage = ds.zgroup.storage
io = IOBuffer()
JSON.print(io, ds.zgroup.attrs)
storage[ds.zgroup.path,".zattrs"] = take!(io)
storage[ds.zgroup.path, ".zattrs"] = take!(io)
end

# groups

function CDM.defGroup(ds::ZarrDataset,groupname::SymbolOrString; attrib = Dict())
function CDM.defGroup(ds::ZarrDataset, groupname::SymbolOrString; attrib=Dict())
_attrib = Dict{String,Any}(attrib)
zg = zgroup(ds.zgroup,String(groupname),attrs = _attrib)
zg = zgroup(ds.zgroup, String(groupname), attrs=_attrib)
dimensions = OrderedDict{Symbol,Int}()
return ZarrDataset(ds,zg,dimensions,ds.iswritable,ds.maskingvalue)
return ZarrDataset(ds, zg, dimensions, ds.iswritable, ds.maskingvalue)
end

CDM.groupnames(ds::ZarrDataset) = keys(ds.zgroup.groups)

function CDM.group(ds::ZarrDataset,groupname::SymbolOrString)
function CDM.group(ds::ZarrDataset, groupname::SymbolOrString)
dimensions = OrderedDict{Symbol,Int}()
zg = ds.zgroup.groups[String(groupname)]
return ZarrDataset(ds,zg,dimensions,ds.iswritable,ds.maskingvalue)
return ZarrDataset(ds, zg, dimensions, ds.iswritable, ds.maskingvalue)
end



CDM.parentdataset(ds::ZarrDataset) = ds.parentdataset
CDM.iswritable(ds::ZarrDataset) = ds.iswritable
CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue


"""
ds = ZarrDataset(url::AbstractString,mode = "r";
_omitcode = [404,403],
maskingvalue = missing)
ZarrDataset(zg::Zarr.ZGroup; _omitcode, maskingvalue)
ZarrDataset(f::Function,url::AbstractString,mode = "r";
maskingvalue = missing)
ZarrDataset(url::AbstractString, mode = "r"; kw...)
ZarrDataset(zg::Zarr.ZGroup; kw...)
ZarrDataset(f::Function, url::AbstractString, mode = "r"; kw...)

Open the zarr dataset at the url or path `url`. The mode can only be `"r"` (read-only)
or `"c"` (create). `ds` supports the API of the
[JuliaGeo/CommonDataModel.jl](https://github.com/JuliaGeo/CommonDataModel.jl).
The experimental `_omitcode` allows to define which HTTP error code should be used
for missing chunks. For compatibility with python's Zarr, the HTTP error 403
(permission denied) is also used to missing chunks in addition to 404 (not
found).

The parameter `maskingvalue` allows to define which special value should be used
as replacement for fill values. The default is `missing`.
# Keywords

- `_omitcode`: *experimental*, allows to define which HTTP error code should be used
for missing chunks. For compatibility with python's Zarr, the HTTP error 403
(permission denied) is also used to missing chunks in addition to 404 (not
found). Default [404, 403].
- `maskingvalue`: The parameter `maskingvalue` allows to define which special
value should be used as replacement for fill values. The default is `missing`.
Defaults to `missing``.
- `attrib`: Attributes, defualts to `Dict()`

Example:

Expand Down Expand Up @@ -128,70 +108,59 @@
end # implicit call to close(ds)
```
"""
function ZarrDataset(url::AbstractString,mode = "r";
parentdataset = nothing,
_omitcode = [404,403],
maskingvalue = missing,
attrib = Dict(),
)

dimensions = OrderedDict{Symbol,Int}()

function ZarrDataset(url::AbstractString, mode="r";
parentdataset=nothing,
_omitcode=[404, 403],
maskingvalue=missing,
attrib=Dict(),
)
zg = if mode == "r"
zg = Zarr.zopen(url,mode)
zg = Zarr.zopen(url, mode)
elseif mode == "c"
store = Zarr.DirectoryStore(url)
zg = zgroup(store, "",attrs = Dict{String,Any}(attrib))
zg = zgroup(store, "", attrs=Dict{String,Any}(attrib))
end
ZarrDataset(zg; mode, parentdataset, _omitcode, maskingvalue, attrib)
end

function ZarrDataset(store::Zarr.AbstractStore,mode = "r";
parentdataset = nothing,
_omitcode = [404,403],
maskingvalue = missing,
attrib = Dict(),
)
function ZarrDataset(store::Zarr.AbstractStore, mode="r";
parentdataset=nothing,
_omitcode=[404, 403],
maskingvalue=missing,
attrib=Dict(),
)
return ZarrDataset(zopen(store, mode); mode, parentdataset, _omitcode, maskingvalue, attrib)
end

function ZarrDataset(zg::Zarr.ZGroup;
mode = "r",
parentdataset = nothing,
_omitcode = [404,403],
maskingvalue = missing,
attrib = Dict(),
)

mode="r",
parentdataset=nothing,
_omitcode=[404, 403],
maskingvalue=missing,
attrib=Dict(),
)
dimensions = ZarrDatasets.OrderedDict{Symbol,Int}()
iswritable = false
if (zg.storage isa Zarr.HTTPStore) ||
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
@debug "omit chunks on HTTP error" _omitcode
Zarr.missing_chunk_return_code!(zg.storage,_omitcode)
end
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
@debug "omit chunks on HTTP error" _omitcode
Zarr.missing_chunk_return_code!(zg.storage, _omitcode)

Check warning on line 144 in src/dataset.jl

View check run for this annotation

Codecov / codecov/patch

src/dataset.jl#L143-L144

Added lines #L143 - L144 were not covered by tests
end

for (varname,zarray) in zg.arrays
for (dimname,dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]),size(zarray))
dn = Symbol(dimname)
if haskey(dimensions,dn)
@assert dimensions[dn] == dimlen
else
dimensions[dn] = dimlen
end
for (varname, zarray) in zg.arrays
for (dimname, dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]), size(zarray))
dn = Symbol(dimname)
if haskey(dimensions, dn)
@assert dimensions[dn] == dimlen

Check warning on line 151 in src/dataset.jl

View check run for this annotation

Codecov / codecov/patch

src/dataset.jl#L151

Added line #L151 was not covered by tests
else
dimensions[dn] = dimlen
end
end
end

return ZarrDataset(parentdataset, zg, dimensions, mode == "r" ? false : zg.writeable, maskingvalue)

end


ZarrDataset(fnames::AbstractArray{<:AbstractString,N}, args...; kwargs...) where N =
MFDataset(ZarrDataset,fnames, args...; kwargs...)


function ZarrDataset(f::Function,args...; kwargs...)
ZarrDataset(fnames::AbstractArray{<:AbstractString,N}, args...; kwargs...) where {N} =
MFDataset(ZarrDataset, fnames, args...; kwargs...)
function ZarrDataset(f::Function, args...; kwargs...)
ds = ZarrDataset(args...; kwargs...)
try
f(ds)
Expand All @@ -200,7 +169,16 @@
end
end

export ZarrDataset
export defDim
export defVar
export defGroup
function _dim(ds::ZarrDataset, dimname::SymbolOrString)
dimlen = get(ds.dimensions, Symbol(dimname), nothing)

if !isnothing(dimlen)
return dimlen
end

if ds.parentdataset !== nothing
return _dim(ds.parentdataset, dimname)
end

error("dimension $dimname is not defined")
end
Loading
Loading