Skip to content

Commit 94433fa

Browse files
Merge pull request #35 from lupemba/subtype-AbstractDiskArray
Make AbstractVariable a subtype of AbstractDiskArray
2 parents f0d1211 + f31d810 commit 94433fa

22 files changed

+409
-168
lines changed

.github/workflows/IntegrationTest.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ jobs:
2323
- {user: JuliaGeo, repo: GRIBDatasets.jl}
2424
- {user: Alexander-Barth, repo: TIFFDatasets.jl}
2525
- {user: JuliaGeo, repo: ZarrDatasets.jl}
26+
- {user: eumetsat, repo: MetopDatasets.jl}
2627

2728
steps:
2829
- uses: actions/checkout@v5

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography"
44
license = "MIT"
55
desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
66
authors = ["Alexander Barth <barth.alexander@gmail.com>"]
7-
version = "0.3.10"
7+
version = "0.4.0"
88

99
[deps]
1010
CFTime = "179af706-886a-5703-950a-314cd64e0468"
1111
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
1212
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
13+
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
1314
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
1415
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1516
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -18,6 +19,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1819
CFTime = "0.1.1, 0.2"
1920
DataStructures = "0.17, 0.18, 0.19"
2021
Dates = "1"
22+
DiskArrays = "0.4.15"
2123
Preferences = "1.3"
2224
Printf = "1"
2325
Statistics = "1"

src/CommonDataModel.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ using Dates
77
using Printf
88
using Preferences
99
using DataStructures
10+
import DiskArrays:
11+
AbstractDiskArray,
12+
AbstractSubDiskArray,
13+
subarray,
14+
writeblock!,
15+
readblock!,
16+
ChunkStyle,
17+
haschunks,
18+
eachchunk,
19+
Unchunked,
20+
Chunked,
21+
GridChunks
22+
23+
import DiskArrays
1024
import Base:
1125
LogicalIndex,
1226
checkbounds,

src/attribute.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function delAttrib(ds::Union{AbstractDataset,AbstractVariable},name::SymbolOrStr
3535
end
3636

3737

38-
attribs(ds::Union{AbstractDataset,AbstractVariable}) =
38+
attribs(ds::Union{AbstractDataset,AbstractVariable, SubVariable}) =
3939
OrderedDict((dn,attrib(ds,dn)) for dn in attribnames(ds))
4040

4141

src/cfconventions.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ variable `ncv` with the
1717
standard name modifier `modifier`. It can be used for example to access
1818
related variable like status flags.
1919
"""
20-
function ancillaryvariables(ncv::CFVariable,modifier)
20+
function ancillaryvariables(ncv::Union{CFVariable, SubVariable{<:Any,<:Any,<:CFVariable}},modifier)
2121
ds = dataset(ncv)
2222
varname = name(ncv)
2323

@@ -44,7 +44,7 @@ allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Mis
4444

4545

4646

47-
function _filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
47+
function _filter(ncv::Union{AbstractVariable,SubVariable}, indices...; accepted_status_flags = nothing)
4848
data = allowmissing(ncv[indices...])
4949

5050
if (accepted_status_flags != nothing)
@@ -99,13 +99,13 @@ good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_dat
9999
```
100100
101101
"""
102-
filter(ncv::AbstractVariable, indices::TIndices...; kwargs...) =
102+
filter(ncv::Union{AbstractVariable,SubVariable}, indices::TIndices...; kwargs...) =
103103
_filter(ncv, indices...; kwargs...)
104104

105-
filter(ncv::AbstractVariable, indices::Union{Vector{<:Integer}, Array{<:CartesianIndex}}...; kwargs...) =
105+
filter(ncv::Union{AbstractVariable,SubVariable}, indices::Union{Vector{<:Integer}, Array{<:CartesianIndex}}...; kwargs...) =
106106
_filter(ncv, indices...; kwargs...)
107107

108-
filter(ncv::AbstractVariable, indices::BitVector; kwargs...) =
108+
filter(ncv::Union{AbstractVariable,SubVariable}, indices::BitVector; kwargs...) =
109109
_filter(ncv, indices...; kwargs...)
110110

111111
"""
@@ -127,7 +127,7 @@ v = ncv[:]
127127
close(ds)
128128
```
129129
"""
130-
function coord(v::AbstractVariable,standard_name)
130+
function coord(v::Union{AbstractVariable,SubVariable},standard_name)
131131
matches = Dict(
132132
"time" => [r".*since.*"],
133133
# It is great to have choice!

src/cfvariable.jl

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ ds = NCDataset("foo.nc");
8484
close(ds)
8585
```
8686
"""
87-
function cfvariable(ds,
87+
function cfvariable(ds,
8888
varname;
8989
_v = variable(ds,varname),
9090
attrib = _v.attrib,
@@ -441,56 +441,83 @@ end
441441
return CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,maskingvalue,DT)
442442
end
443443

444+
## Define for DiskArrays
445+
@inline function CFinvtransformdata(data::AbstractDiskArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT) where {T,N}
446+
data_materialized = Array(data)
447+
return CFinvtransformdata(data_materialized,fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT)
448+
end
449+
450+
@inline function CFinvtransformdata(
451+
data::AbstractDiskArray{T,N},fv::Tuple{},scale_factor::Nothing,
452+
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,maskingvalue,::Type{T}) where {T,N}
453+
# no transformation necessary (avoid allocation)
454+
return data
455+
end
456+
444457

445458

446459
# this function is necessary to avoid "iterating" over a single character in Julia 1.0 (fixed Julia 1.3)
447460
# https://discourse.julialang.org/t/broadcasting-and-single-characters/16836
448461
#@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
449462
#@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)
450463

451-
function Base.getindex(v::CFVariable, indexes::TIndices...)
452-
data = parent(v)[indexes...]
453-
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
454-
time_origin(v),time_factor(v),maskingvalue(v),eltype(v))
464+
function DiskArrays.readblock!(v::CFVariable{T, N},
465+
aout,
466+
indexes::Vararg{OrdinalRange, N}) where {T, N}
467+
468+
parent_var = parent(v)
469+
data = similar(aout, eltype(parent_var))
470+
DiskArrays.readblock!(parent_var, data, indexes...)
471+
472+
CFtransformdata!(aout, data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
473+
time_origin(v),time_factor(v),maskingvalue(v))
474+
475+
476+
return nothing
455477
end
456478

457-
function Base.setindex!(v::CFVariable,data::Array{Missing,N},indexes::TIndices...) where N
458-
parent(v)[indexes...] = fill(fillvalue(v),size(data))
479+
480+
function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Array{Missing,N}, indexes::Vararg{OrdinalRange, N}) where {T, N}
481+
parent(v)[indexes...] .= fillvalue(v)
459482
end
460483

461-
function Base.setindex!(v::CFVariable,data::Missing,indexes::TIndices...)
462-
parent(v)[indexes...] = fillvalue(v)
484+
function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Missing, indexes::Vararg{OrdinalRange, N}) where {T, N}
485+
parent(v)[indexes...] .= fillvalue(v)
463486
end
464487

465-
function Base.setindex!(v::CFVariable,data::Union{T,Array{T}},indexes::TIndices...) where T <: Union{AbstractCFDateTime,DateTime,Missing}
466488

489+
function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Union{DT,Array{DT}}, indexes::Vararg{OrdinalRange, N}) where {T, N, DT <: Union{AbstractCFDateTime,DateTime,Missing}}
467490
if calendar(v) !== nothing
468491
# can throw an convertion error if calendar attribute already exists and
469492
# is incompatible with the provided data
470-
parent(v)[indexes...] = CFinvtransformdata(
493+
data_transformed = CFinvtransformdata(
471494
data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
472495
time_origin(v),time_factor(v),
473496
maskingvalue(v),
474497
eltype(parent(v)))
498+
499+
DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
500+
475501
return data
476502
end
477503

478504
@error "Time units and calendar must be defined during defVar and cannot change"
479505
end
480506

507+
function DiskArrays.writeblock!(v::CFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
481508

482-
function Base.setindex!(v::CFVariable,data,indexes::TIndices...)
483-
parent(v)[indexes...] = CFinvtransformdata(
484-
data,fill_and_missing_values(v),
485-
scale_factor(v),add_offset(v),
486-
time_origin(v),time_factor(v),
487-
maskingvalue(v),
488-
eltype(parent(v)))
509+
data_transformed = CFinvtransformdata(
510+
data,fill_and_missing_values(v),
511+
scale_factor(v),add_offset(v),
512+
time_origin(v),time_factor(v),
513+
maskingvalue(v),
514+
eltype(parent(v)))
489515

490-
return data
516+
DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
491517
end
492518

493519

520+
494521
# can be implemented overridden for faster implementation
495522
function boundsParentVar(ds,varname)
496523
for vn in varnames(ds)
@@ -526,7 +553,7 @@ function _getattrib(ds,v,parentname,attribname,default)
526553
end
527554
end
528555

529-
function _isrelated(v1::AbstractVariable,v2::AbstractVariable)
556+
function _isrelated(v1::Union{AbstractVariable,SubVariable},v2::Union{AbstractVariable,SubVariable})
530557
dimnames(v1) dimnames(v2)
531558
end
532559

src/dataset.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
171171
end
172172

173173

174-
function Base.setindex!(ds::AbstractDataset,data::AbstractVariable,varname::SymbolOrString)
174+
function Base.setindex!(ds::AbstractDataset,data::Union{AbstractVariable, SubVariable},varname::SymbolOrString)
175175
return defVar(ds, varname, data)
176176
end
177177

@@ -202,7 +202,7 @@ julia> data = varbyattrib(ds, standard_name = "longitude")[1][:]
202202
```
203203
204204
"""
205-
function varbyattrib(ds::Union{AbstractDataset,AbstractVariable}; kwargs...)
205+
function varbyattrib(ds::Union{AbstractDataset,AbstractVariable, SubVariable}; kwargs...)
206206
# Start with an empty list of variables
207207
varlist = []
208208

src/defer.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ variable(dds::DeferDataset,varname::Symbol) = variable(dds,string(varname))
111111
dataset(dv::DeferVariable{T,N,TDS}) where {T,N,TDS} =
112112
DeferDataset(TDS,dv.r.filename,dv.r.mode; dv.r.args...)
113113

114-
function Base.getindex(dv::DeferVariable,indexes::Union{Int,Colon,AbstractRange{<:Integer}}...)
114+
function DiskArrays.readblock!(dv::DeferVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
115115
Variable(dv) do v
116-
return v[indexes...]
116+
aout .= v[indexes...]
117117
end
118118
end
119119

src/groupby.jl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ function Base.broadcasted(::GroupedVariableStyle,f::Function,A::GroupedVariable{
279279
A.v,A.coordname,A.group_fun,A.groupmap,A.dim,map_fun)
280280
end
281281

282-
function GroupedVariable(v::TV,coordname,group_fun::TF,groupmap,dim,map_fun::TM) where TV <: AbstractVariable where {TF,TM}
282+
function GroupedVariable(v::TV,coordname,group_fun::TF,groupmap,dim,map_fun::TM) where TV <: Union{AbstractVariable,SubVariable} where {TF,TM}
283283
TGM = typeof(groupmap)
284284

285285
#TG = Base.return_types(selectdim,(TV,Int,Int,))[1]
@@ -361,7 +361,7 @@ close(ds)
361361
```
362362
363363
"""
364-
function groupby(v::AbstractVariable,(coordname,group_fun)::Pair{<:SymbolOrString,TF}) where TF
364+
function groupby(v::Union{AbstractVariable,SubVariable},(coordname,group_fun)::Pair{<:SymbolOrString,TF}) where TF
365365
# for NCDatasets 0.12
366366
c = v[String(coordname)][:]
367367
class = group_fun.(c)
@@ -462,6 +462,9 @@ Base.BroadcastStyle(::Type{<:ReducedGroupedVariable}) = ReducedGroupedVariableSt
462462
Base.BroadcastStyle(::DefaultArrayStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
463463
Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DefaultArrayStyle) = ReducedGroupedVariableStyle()
464464

465+
Base.BroadcastStyle(::DiskArrays.ChunkStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
466+
Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DiskArrays.ChunkStyle) = ReducedGroupedVariableStyle()
467+
465468
function Base.similar(bc::Broadcasted{ReducedGroupedVariableStyle}, ::Type{ElType}) where ElType
466469
# Scan the inputs for the ReducedGroupedVariable:
467470
A = find_gv(ReducedGroupedVariable,bc)
@@ -587,3 +590,16 @@ function dataset(gr::ReducedGroupedVariable)
587590
gr.reduce_fun,
588591
)
589592
end
593+
594+
# ReducedGroupedVariable is a Variable and therefore an AbstractDiskArray.
595+
# getindex is overloaded for ReducedGroupedVariable and therefore
596+
# ReducedGroupedVariable is defined to use getindex.
597+
# An alternative solution is to remove getindex definitions
598+
# and then implement the read logic in readblock!
599+
function DiskArrays.readblock!(gr::ReducedGroupedVariable{T,N},
600+
aout,
601+
indexes::Vararg{OrdinalRange, N}) where {T,N}
602+
603+
aout .= Base.getindex(gr,indexes...)
604+
return aout
605+
end

src/memory_dataset.jl

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,18 @@ end
7070

7171
Base.parent(v::MemoryVariable) = v.data
7272
Base.size(v::MemoryVariable) = size(parent(v))
73-
Base.getindex(v::MemoryVariable,ij::TIndices...) = parent(v)[ij...]
74-
function Base.setindex!(v::MemoryVariable,data,ij...)
73+
74+
function DiskArrays.readblock!(v::MemoryVariable{T, N},
75+
aout,
76+
indexes::Vararg{OrdinalRange, N}) where {T, N}
77+
78+
aout .= parent(v)[indexes...]
79+
end
80+
81+
82+
function DiskArrays.writeblock!(v::MemoryVariable{T, N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
7583
sz = size(v)
76-
parent(v)[ij...] = data
84+
parent(v)[indexes...] = data
7785

7886
root = _root(v)
7987
for idim = findall(size(v) .> sz)
@@ -83,6 +91,19 @@ function Base.setindex!(v::MemoryVariable,data,ij...)
8391
return data
8492
end
8593

94+
function DiskArrays.writeblock!(v::MemoryVariable{T, 0}, data) where {T}
95+
sz = size(v)
96+
parent(v)[] = data[]
97+
98+
root = _root(v)
99+
for idim = findall(size(v) .> sz)
100+
dname = v.dimnames[idim]
101+
grow_unlimited_dimension(v.parent_dataset,dname,size(v,idim))
102+
end
103+
return data
104+
end
105+
106+
86107
CDM.load!(v::MemoryVariable,buffer,ij...) = buffer .= view(parent(v),ij...)
87108
CDM.name(v::Union{MemoryVariable,MemoryDataset}) = v.name
88109
CDM.dimnames(v::MemoryVariable) = v.dimnames

0 commit comments

Comments
 (0)