Skip to content

Commit f8a6e0d

Browse files
fill value and write support
1 parent 10f96ce commit f8a6e0d

File tree

9 files changed

+163
-27
lines changed

9 files changed

+163
-27
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ version = "0.1.0"
77
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
88
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
99
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
10+
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1011
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
1112

1213
[compat]

docs/src/index.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Modules = [ZarrDatasets]
88

99
### Differences between Zarr and NetCDF files
1010

11-
* All metadata is stored in JSON files for Zarr with the following implications:
11+
* All metadata (in particular attributes) is stored in JSON files for the Zarr format with the following implications:
1212
* JSON does not distinguish between integers and real numbers. They are all considered as generic numbers. Whole numbers are loaded as `Int64` and decimal numbers `Float64`. It is not possible to store the number `1.0` as a real number.
1313
* The order of keys in a JSON document is undefined. It is therefore not possible to have a consistent ordering of the attributes or variables.
14+
* The JSON standard does not allow NaN, +Inf, -Inf (https://github.com/capnproto/capnproto/issues/261).

src/ZarrDatasets.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ import CommonDataModel:
1313
attrib,
1414
attribnames,
1515
dataset,
16+
defAttrib,
17+
defVar,
18+
defDim,
1619
dim,
1720
dimnames,
1821
iswritable,
@@ -29,6 +32,7 @@ import DiskArrays:
2932
import CommonDataModel as CDM
3033
using DataStructures
3134
using Zarr
35+
import JSON
3236

3337
include("types.jl")
3438
include("dataset.jl")

src/dataset.jl

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,7 @@ function CDM.variable(ds::ZarrDataset,varname::SymbolOrString)
88
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray,ds)
99
end
1010

11-
CDM.dimnames(ds::ZarrDataset) = Tuple(
12-
sort(
13-
unique(
14-
reduce(vcat,
15-
(collect(dimnames(variable(ds,vn))) for vn in keys(ds)),
16-
init = String[]
17-
))))
11+
CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))
1812

1913
# function CDM.unlimited(ds::ZarrDataset)
2014
# ul = ds.unlimited
@@ -33,31 +27,35 @@ CDM.dimnames(ds::ZarrDataset) = Tuple(
3327
# return nothing
3428
# end
3529

36-
function CDM.dim(ds::ZarrDataset,dimname::SymbolOrString)
30+
CDM.dim(ds::ZarrDataset,dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]
3731

38-
for vn in keys(ds)
39-
v = variable(ds,vn)
40-
dn = dimnames(v)
41-
i = findfirst(==(dimname),dn)
42-
if !isnothing(i)
43-
return size(v,i)
44-
end
45-
end
46-
error("dimension $dimname not found")
32+
function CDM.defDim(ds::ZarrDataset,dimname::SymbolOrString,dimlen)
33+
dn = Symbol(dimname)
34+
@assert !haskey(ds.dimensions,dn)
35+
ds.dimensions[dn] = dimlen
4736
end
4837

4938
CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)
5039

5140
CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
5241
CDM.attrib(ds::ZarrDataset,name::SymbolOrString) = ds.zgroup.attrs[String(name)]
5342

43+
function CDM.defAttrib(ds::ZarrDataset,name::SymbolOrString,value)
44+
@assert iswritable(ds)
45+
ds.zgroup.attrs[String(name)] = value
46+
47+
storage = ds.zgroup.storage
48+
io = IOBuffer()
49+
JSON.print(io, ds.zgroup.attrs)
50+
storage[ds.zgroup.path,".zattrs"] = take!(io)
51+
end
5452

5553
CDM.groupnames(ds::ZarrDataset) = keys(ds.zgroup.groups)
5654
CDM.group(ds::ZarrDataset,name::SymbolOrString) = ZarrDataset(ds.zgroup.groups,String(name),ds)
5755

5856

5957
CDM.parentdataset(ds::ZarrDataset) = ds.parentdataset
60-
CDM.iswritable(ds::ZarrDataset) = false
58+
CDM.iswritable(ds::ZarrDataset) = ds.iswritable
6159
CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue
6260

6361

@@ -108,12 +106,38 @@ end # implicit call to close(ds)
108106
function ZarrDataset(url::AbstractString,mode = "r";
109107
parentdataset = nothing,
110108
_omitcode = 404,
111-
maskingvalue = missing)
112-
ds = Zarr.zopen(url,mode)
113-
if ds.storage isa Zarr.HTTPStore
114-
Zarr.missing_chunk_return_code!(ds.storage,_omitcode)
109+
maskingvalue = missing,
110+
attrib = Dict(),
111+
)
112+
113+
dimensions = OrderedDict{Symbol,Int}()
114+
iswritable = false
115+
116+
if mode == "r"
117+
zg = Zarr.zopen(url,mode)
118+
if (zg.storage isa Zarr.HTTPStore) ||
119+
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
120+
@debug "omit chunks on HTTP error" _omitcode
121+
Zarr.missing_chunk_return_code!(zg.storage,_omitcode)
122+
end
123+
124+
for (varname,zarray) in zg.arrays
125+
for (dimname,dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]),size(zarray))
126+
127+
dn = Symbol(dimname)
128+
if haskey(dimensions,dn)
129+
@assert dimensions[dn] == dimlen
130+
else
131+
dimensions[dn] = dimlen
132+
end
133+
end
134+
end
135+
elseif mode == "c"
136+
store = Zarr.DirectoryStore(url)
137+
zg = zgroup(store, "",attrs = Dict(attrib))
138+
iswritable = true
115139
end
116-
ZarrDataset(ds,parentdataset,maskingvalue)
140+
ZarrDataset(zg,parentdataset,dimensions,iswritable,maskingvalue)
117141
end
118142

119143

src/types.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ end
77
struct ZarrDataset{TZ,TP,Tmaskingvalue} <: CDM.AbstractDataset
88
zgroup::TZ
99
parentdataset::TP
10+
dimensions::OrderedDict{Symbol,Int}
11+
iswritable::Bool
1012
maskingvalue::Tmaskingvalue
1113
end

src/variable.jl

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,61 @@ CDM.name(v::ZarrVariable) = Zarr.zname(v.zarray)
1010
CDM.dimnames(v::ZarrVariable) = Tuple(reverse(v.zarray.attrs["_ARRAY_DIMENSIONS"]))
1111
CDM.dataset(v::ZarrVariable) = v.parentdataset
1212

13-
CDM.attribnames(v::ZarrVariable) = filter(!=("_ARRAY_DIMENSIONS"),keys(v.zarray.attrs))
14-
CDM.attrib(v::ZarrVariable,name::SymbolOrString) = v.zarray.attrs[String(name)]
13+
function CDM.attribnames(v::ZarrVariable)
14+
names = filter(!=("_ARRAY_DIMENSIONS"),keys(v.zarray.attrs))
15+
if !isnothing(v.zarray.metadata.fill_value)
16+
push!(names,"_FillValue")
17+
end
18+
return names
19+
end
20+
21+
function CDM.attrib(v::ZarrVariable,name::SymbolOrString)
22+
if String(name) == "_FillValue" && !isnothing(v.zarray.metadata.fill_value)
23+
return v.zarray.metadata.fill_value
24+
end
25+
return v.zarray.attrs[String(name)]
26+
end
27+
28+
function CDM.defAttrib(v::ZarrVariable,name::SymbolOrString,value)
29+
@assert iswritable(dataset(v))
30+
@assert String(name) !== "_FillValue"
31+
32+
v.zarray.attrs[String(name)] = value
33+
34+
storage = v.zarray.storage
35+
io = IOBuffer()
36+
JSON.print(io, v.zarray.attrs)
37+
storage[v.zarray.path,".zattrs"] = take!(io)
38+
end
1539

1640

1741
# DiskArray methods
1842
eachchunk(v::ZarrVariable) = eachchunk(v.zarray)
1943
haschunks(v::ZarrVariable) = haschunks(v.zarray)
2044
eachchunk(v::CFVariable{T,N,<:ZarrVariable}) where {T,N} = eachchunk(v.var)
2145
haschunks(v::CFVariable{T,N,<:ZarrVariable}) where {T,N} = haschunks(v.var)
46+
47+
48+
function CDM.defVar(ds::ZarrDataset,name::SymbolOrString,vtype::DataType,dimensionnames; chunksizes=nothing, attrib = Dict(), kwargs...)
49+
@assert iswritable(ds)
50+
51+
_attrib = Dict(attrib)
52+
_attrib["_ARRAY_DIMENSIONS"] = reverse(dimensionnames)
53+
54+
_size = ntuple(length(dimensionnames)) do i
55+
ds.dimensions[Symbol(dimensionnames[i])]
56+
end
57+
58+
if isnothing(chunksizes)
59+
chunksizes = _size
60+
end
61+
zarray = zcreate(
62+
vtype, ds.zgroup, name, _size...;
63+
chunks = chunksizes,
64+
attrs = _attrib,
65+
kwargs...
66+
)
67+
68+
return ZarrVariable{vtype,ndims(zarray),typeof(zarray),typeof(ds)}(
69+
zarray,ds)
70+
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ using ZarrDatasets
44
@testset "ZarrDatasets.jl" begin
55
include("test_cdm.jl")
66
include("test_multifile.jl")
7+
include("test_write.jl")
78
end

test/test_cdm.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
using CommonDataModel: iswritable, attribnames, parentdataset, load!, dataset
1+
using CommonDataModel:
2+
attribnames,
3+
dataset,
4+
iswritable,
5+
load!,
6+
parentdataset
27
using Dates
38
using DiskArrays
49
using NCDatasets

test/test_write.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using ZarrDatasets
2+
using ZarrDatasets:
3+
defDim,
4+
defVar,
5+
defAttrib
6+
using Zarr
7+
using DataStructures
8+
9+
data = rand(Int32,3,5)
10+
11+
fname = tempname()
12+
mkdir(fname)
13+
gattrib = Dict{String,Any}("title" => "this is the title")
14+
ds = ZarrDataset(fname,"c",attrib = gattrib)
15+
16+
defDim(ds,"lon",3)
17+
defDim(ds,"lat",5)
18+
19+
attrib = Dict{String,Any}(
20+
"units" => "m/s",
21+
"long_name" => "test",
22+
)
23+
24+
25+
varname = "var2"
26+
dimensionnames = ("lon","lat")
27+
vtype = Int32
28+
29+
zv = defVar(ds,varname,vtype,dimensionnames, attrib = attrib)
30+
zv[:,:] = data
31+
zv.attrib["lala"] = 12
32+
zv.attrib["standard_name"] = "test"
33+
ds.attrib["history"] = "test"
34+
close(ds)
35+
36+
ds = ZarrDataset(fname)
37+
38+
zv = ds[varname]
39+
40+
@test zv.attrib["lala"] == 12
41+
@test zv.attrib["standard_name"] == "test"
42+
@test ds.attrib["history"] == "test"
43+
44+
@test zv[:,:] == data
45+
46+
io = IOBuffer()
47+
show(io,ds)
48+
str = String(take!(io))
49+
@test occursin("Global",str)

0 commit comments

Comments
 (0)