Skip to content

Commit c6b726d

Browse files
authored
Merge pull request #10 from JuliaGeo/format
format and clean up code
2 parents 52830ce + b018c6f commit c6b726d

File tree

13 files changed

+284
-279
lines changed

13 files changed

+284
-279
lines changed

docs/make.jl

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,7 @@ makedocs(;
1010
canonical="https://juliageo.github.io/ZarrDatasets.jl",
1111
assets=String[],
1212
),
13-
pages=[
14-
"Home" => "index.md",
15-
],
13+
pages=["Home" => "index.md"],
1614
)
1715

18-
deploydocs(;
19-
repo="github.com/JuliaGeo/ZarrDatasets.jl",
20-
)
16+
deploydocs(; repo="github.com/JuliaGeo/ZarrDatasets.jl")

docs/src/index.md

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
21
## ZarrDatasets
32

4-
53
See the [documentation of JuliaGeo/CommonDataModel.jl](https://juliageo.org/CommonDataModel.jl/stable/) for the full documentation of the API. As a quick reference, here is an example how to create and read a Zarr file store as a quick reference.
64

75
### Create a Zarr file store
@@ -39,15 +37,10 @@ data_units = ds["varname"].attrib["units"]
3937
```
4038

4139

42-
4340
```@autodocs
4441
Modules = [ZarrDatasets]
4542
```
4643

47-
48-
49-
50-
5144
### Differences between Zarr and NetCDF files
5245

5346
* All metadata (in particular attributes) is stored in JSON files for the Zarr format with the following implications:

src/ZarrDatasets.jl

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
module ZarrDatasets
22

3-
import Base:
4-
checkbounds,
5-
getindex,
6-
setindex!,
7-
size
3+
import Base: checkbounds, getindex, setindex!, size
84

95
import CommonDataModel:
106
CFVariable,
@@ -26,18 +22,19 @@ import CommonDataModel:
2622
parentdataset,
2723
variable
2824

29-
import DiskArrays:
30-
eachchunk,
31-
haschunks
25+
import DiskArrays: eachchunk, haschunks
3226

3327
import CommonDataModel as CDM
28+
using JSON: JSON
29+
3430
using DataStructures
3531
using Zarr
36-
import JSON
32+
33+
export ZarrDataset
34+
export defDim, defVar, defGroup
3735

3836
include("types.jl")
3937
include("dataset.jl")
4038
include("variable.jl")
4139

42-
export ZarrDataset
4340
end

src/dataset.jl

Lines changed: 96 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
1+
# Base interface
12

2-
CDM.name(v::ZarrDataset) = Zarr.zname(v.zgroup)
33
Base.keys(ds::ZarrDataset) = keys(ds.zgroup.arrays)
4-
Base.haskey(ds::ZarrDataset,varname::SymbolOrString) = haskey(ds.zgroup.arrays,String(varname))
4+
function Base.haskey(ds::ZarrDataset, varname::SymbolOrString)
5+
haskey(ds.zgroup.arrays, String(varname))
6+
end
57

6-
function CDM.variable(ds::ZarrDataset,varname::SymbolOrString)
8+
# CommonDataModel.jl interface methods
9+
10+
CDM.name(v::ZarrDataset) = Zarr.zname(v.zgroup)
11+
function CDM.variable(ds::ZarrDataset, varname::SymbolOrString)
712
zarray = ds.zgroup.arrays[String(varname)]
8-
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray,ds)
13+
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray, ds)
914
end
10-
1115
CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))
16+
CDM.dim(ds::ZarrDataset, dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]
17+
CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)
18+
CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
19+
CDM.attrib(ds::ZarrDataset, name::SymbolOrString) = ds.zgroup.attrs[String(name)]
1220

1321
# function CDM.unlimited(ds::ZarrDataset)
1422
# ul = ds.unlimited
@@ -18,67 +26,40 @@ CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))
1826
# return ul
1927
# end
2028

21-
function _dim(ds::ZarrDataset,dimname::SymbolOrString)
22-
dimlen = get(ds.dimensions,Symbol(dimname),nothing)
23-
24-
if !isnothing(dimlen)
25-
return dimlen
26-
end
27-
28-
if ds.parentdataset !== nothing
29-
return _dim(ds.parentdataset,dimname)
30-
end
31-
32-
error("dimension $dimname is not defined")
33-
end
34-
35-
CDM.dim(ds::ZarrDataset,dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]
36-
37-
function CDM.defDim(ds::ZarrDataset,dimname::SymbolOrString,dimlen)
29+
function CDM.defDim(ds::ZarrDataset, dimname::SymbolOrString, dimlen)
3830
dn = Symbol(dimname)
39-
@assert !haskey(ds.dimensions,dn)
31+
@assert !haskey(ds.dimensions, dn)
4032
ds.dimensions[dn] = dimlen
4133
end
4234

43-
CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)
44-
45-
CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
46-
CDM.attrib(ds::ZarrDataset,name::SymbolOrString) = ds.zgroup.attrs[String(name)]
47-
48-
function CDM.defAttrib(ds::ZarrDataset,name::SymbolOrString,value)
35+
function CDM.defAttrib(ds::ZarrDataset, name::SymbolOrString, value)
4936
@assert iswritable(ds)
5037
ds.zgroup.attrs[String(name)] = value
5138

5239
storage = ds.zgroup.storage
5340
io = IOBuffer()
5441
JSON.print(io, ds.zgroup.attrs)
55-
storage[ds.zgroup.path,".zattrs"] = take!(io)
42+
storage[ds.zgroup.path, ".zattrs"] = take!(io)
5643
end
5744

5845
# groups
59-
60-
function CDM.defGroup(ds::ZarrDataset,groupname::SymbolOrString; attrib = Dict())
46+
function CDM.defGroup(ds::ZarrDataset, groupname::SymbolOrString; attrib=Dict())
6147
_attrib = Dict{String,Any}(attrib)
62-
zg = zgroup(ds.zgroup,String(groupname),attrs = _attrib)
48+
zg = zgroup(ds.zgroup, String(groupname); attrs=_attrib)
6349
dimensions = OrderedDict{Symbol,Int}()
64-
return ZarrDataset(ds,zg,dimensions,ds.iswritable,ds.maskingvalue)
50+
return ZarrDataset(ds, zg, dimensions, ds.iswritable, ds.maskingvalue)
6551
end
66-
6752
CDM.groupnames(ds::ZarrDataset) = keys(ds.zgroup.groups)
68-
69-
function CDM.group(ds::ZarrDataset,groupname::SymbolOrString)
53+
function CDM.group(ds::ZarrDataset, groupname::SymbolOrString)
7054
dimensions = OrderedDict{Symbol,Int}()
7155
zg = ds.zgroup.groups[String(groupname)]
72-
return ZarrDataset(ds,zg,dimensions,ds.iswritable,ds.maskingvalue)
56+
return ZarrDataset(ds, zg, dimensions, ds.iswritable, ds.maskingvalue)
7357
end
7458

75-
76-
7759
CDM.parentdataset(ds::ZarrDataset) = ds.parentdataset
7860
CDM.iswritable(ds::ZarrDataset) = ds.iswritable
7961
CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue
8062

81-
8263
"""
8364
ds = ZarrDataset(url::AbstractString,mode = "r";
8465
_omitcode = [404,403],
@@ -90,13 +71,17 @@ CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue
9071
Open the zarr dataset at the url or path `url`. The mode can be `"r"` (read-only),
9172
`"w"` (write), or `"c"` (create). `ds` supports the API of the
9273
[JuliaGeo/CommonDataModel.jl](https://github.com/JuliaGeo/CommonDataModel.jl).
93-
The experimental `_omitcode` allows to define which HTTP error code should be used
94-
for missing chunks. For compatibility with python's Zarr, the HTTP error 403
95-
(permission denied) is also used to missing chunks in addition to 404 (not
96-
found).
9774
98-
The parameter `maskingvalue` allows to define which special value should be used
99-
as replacement for fill values. The default is `missing`.
75+
# Keywords
76+
77+
- `_omitcode`: *experimental*, allows to define which HTTP error code should be used
78+
for missing chunks. For compatibility with python's Zarr, the HTTP error 403
79+
(permission denied) is also used to missing chunks in addition to 404 (not
80+
found). Default [404, 403].
81+
- `maskingvalue`: The parameter `maskingvalue` allows to define which special
82+
value should be used as replacement for fill values. The default is `missing`.
83+
Defaults to `missing``.
84+
- `attrib`: Attributes, defualts to `Dict()`
10085
10186
Example:
10287
@@ -128,72 +113,80 @@ zos1 = ZarrDataset(url) do ds
128113
end # implicit call to close(ds)
129114
```
130115
"""
131-
function ZarrDataset(url::AbstractString, mode = "r";
132-
parentdataset = nothing,
133-
_omitcode = [404,403],
134-
maskingvalue = missing,
135-
attrib = Dict(),
136-
)
137-
116+
function ZarrDataset(
117+
url::AbstractString,
118+
mode="r";
119+
parentdataset=nothing,
120+
_omitcode=[404, 403],
121+
maskingvalue=missing,
122+
attrib=Dict(),
123+
)
138124
dimensions = OrderedDict{Symbol,Int}()
139125

140126
zg = if mode in ("w", "r")
141127
zg = Zarr.zopen(url, mode)
142128
elseif mode == "c"
143129
store = Zarr.DirectoryStore(url)
144-
zg = zgroup(store, "", attrs = Dict{String,Any}(attrib))
130+
zg = zgroup(store, ""; attrs=Dict{String,Any}(attrib))
145131
else
146132
throw(ArgumentError("mode must be \"r\", \"w\" or \"c\", got $mode"))
147133
end
148134
ZarrDataset(zg; mode, parentdataset, _omitcode, maskingvalue, attrib)
149135
end
150136

151-
function ZarrDataset(store::Zarr.AbstractStore, mode = "r";
152-
parentdataset = nothing,
153-
_omitcode = [404,403],
154-
maskingvalue = missing,
155-
attrib = Dict(),
156-
)
157-
return ZarrDataset(zopen(store, mode); mode, parentdataset, _omitcode, maskingvalue, attrib)
137+
function ZarrDataset(
138+
store::Zarr.AbstractStore,
139+
mode="r";
140+
parentdataset=nothing,
141+
_omitcode=[404, 403],
142+
maskingvalue=missing,
143+
attrib=Dict(),
144+
)
145+
return ZarrDataset(
146+
zopen(store, mode); mode, parentdataset, _omitcode, maskingvalue, attrib
147+
)
158148
end
159149

160-
function ZarrDataset(zg::Zarr.ZGroup;
161-
mode = "r",
162-
parentdataset = nothing,
163-
_omitcode = [404,403],
164-
maskingvalue = missing,
165-
attrib = Dict(),
166-
)
167-
150+
function ZarrDataset(
151+
zg::Zarr.ZGroup;
152+
mode="r",
153+
parentdataset=nothing,
154+
_omitcode=[404, 403],
155+
maskingvalue=missing,
156+
attrib=Dict(),
157+
)
168158
dimensions = ZarrDatasets.OrderedDict{Symbol,Int}()
169159
iswritable = false
170160
if (zg.storage isa Zarr.HTTPStore) ||
171161
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
172-
@debug "omit chunks on HTTP error" _omitcode
173-
Zarr.missing_chunk_return_code!(zg.storage,_omitcode)
174-
end
162+
@debug "omit chunks on HTTP error" _omitcode
163+
Zarr.missing_chunk_return_code!(zg.storage, _omitcode)
164+
end
175165

176-
for (varname,zarray) in zg.arrays
177-
for (dimname,dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]),size(zarray))
178-
dn = Symbol(dimname)
179-
if haskey(dimensions,dn)
180-
@assert dimensions[dn] == dimlen
181-
else
182-
dimensions[dn] = dimlen
183-
end
166+
for (varname, zarray) in zg.arrays
167+
for (dimname, dimlen) in
168+
zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]), size(zarray))
169+
dn = Symbol(dimname)
170+
if haskey(dimensions, dn)
171+
@assert dimensions[dn] == dimlen
172+
else
173+
dimensions[dn] = dimlen
174+
end
184175
end
185176
end
186177

187-
return ZarrDataset(parentdataset, zg, dimensions, mode == "r" ? false : zg.writeable, maskingvalue)
188-
178+
return ZarrDataset(
179+
parentdataset, zg, dimensions, mode == "r" ? false : zg.writeable, maskingvalue
180+
)
189181
end
190182

183+
function ZarrDataset(
184+
fnames::AbstractArray{<:AbstractString,N}, args...; kwargs...
185+
) where {N}
186+
MFDataset(ZarrDataset, fnames, args...; kwargs...)
187+
end
191188

192-
ZarrDataset(fnames::AbstractArray{<:AbstractString,N}, args...; kwargs...) where N =
193-
MFDataset(ZarrDataset,fnames, args...; kwargs...)
194-
195-
196-
function ZarrDataset(f::Function,args...; kwargs...)
189+
function ZarrDataset(f::Function, args...; kwargs...)
197190
ds = ZarrDataset(args...; kwargs...)
198191
try
199192
f(ds)
@@ -202,7 +195,18 @@ function ZarrDataset(f::Function,args...; kwargs...)
202195
end
203196
end
204197

205-
export ZarrDataset
206-
export defDim
207-
export defVar
208-
export defGroup
198+
# Utility functions
199+
200+
function _dim(ds::ZarrDataset, dimname::SymbolOrString)
201+
dimlen = get(ds.dimensions, Symbol(dimname), nothing)
202+
203+
if !isnothing(dimlen)
204+
return dimlen
205+
end
206+
207+
if ds.parentdataset !== nothing
208+
return _dim(ds.parentdataset, dimname)
209+
end
210+
211+
error("dimension $dimname is not defined")
212+
end

src/types.jl

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11

2-
struct ZarrVariable{T,N,TZA <: AbstractArray{T,N},TZG} <: CDM.AbstractVariable{T,N}
2+
"""
3+
ZarrVariable <: CDM.AbstractVariable
4+
5+
ZarrVariable(zarray, parentdataset)
6+
7+
A CommonDataModel.jl compatible zarr variable.
8+
9+
Usually generated with `CommonDataModel.defvar(::ZarrDataset, args...; kw...)`
10+
"""
11+
struct ZarrVariable{T,N,TZA<:AbstractArray{T,N},TZG} <: CDM.AbstractVariable{T,N}
312
zarray::TZA
413
parentdataset::TZG
514
end
615

7-
struct ZarrDataset{TDS <: Union{CDM.AbstractDataset,Nothing},Tmaskingvalue,TZ} <: CDM.AbstractDataset
16+
# Documented in dataset.jl
17+
struct ZarrDataset{TDS<:Union{CDM.AbstractDataset,Nothing},Tmaskingvalue,TZ} <:
18+
CDM.AbstractDataset
819
parentdataset::TDS
920
zgroup::TZ
1021
dimensions::OrderedDict{Symbol,Int}

0 commit comments

Comments
 (0)