Skip to content

Make AbstractVariable a subtype of AbstractDiskArray #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography"
license = "MIT"
desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
authors = ["Alexander Barth <[email protected]>"]
version = "0.3.9"
version = "0.4.0"

[deps]
CFTime = "179af706-886a-5703-950a-314cd64e0468"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -18,6 +19,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
CFTime = "0.1.1, 0.2"
DataStructures = "0.17, 0.18"
Dates = "1"
DiskArrays = "0.4.13"
Preferences = "1.3"
Printf = "1"
Statistics = "1"
Expand Down
14 changes: 14 additions & 0 deletions src/CommonDataModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ using Dates
using Printf
using Preferences
using DataStructures
import DiskArrays:
AbstractDiskArray,
AbstractSubDiskArray,
subarray,
writeblock!,
readblock!,
ChunkStyle,
haschunks,
eachchunk,
Unchunked,
Chunked,
GridChunks

import DiskArrays
import Base:
LogicalIndex,
checkbounds,
Expand Down
2 changes: 1 addition & 1 deletion src/attribute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function delAttrib(ds::Union{AbstractDataset,AbstractVariable},name::SymbolOrStr
end


attribs(ds::Union{AbstractDataset,AbstractVariable}) =
attribs(ds::Union{AbstractDataset,AbstractVariable, SubVariable}) =
OrderedDict((dn,attrib(ds,dn)) for dn in attribnames(ds))


Expand Down
81 changes: 62 additions & 19 deletions src/cfvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
close(ds)
```
"""
function cfvariable(ds,
function cfvariable(ds,
varname;
_v = variable(ds,varname),
attrib = _v.attrib,
Expand Down Expand Up @@ -441,56 +441,99 @@
return CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,maskingvalue,DT)
end

## Define for DiskArrays
@inline function CFinvtransformdata(data::AbstractDiskArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT) where {T,N}
data_materialized = Array(data)
return CFinvtransformdata(data_materialized,fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT)

Check warning on line 447 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L445-L447

Added lines #L445 - L447 were not covered by tests
end

@inline function CFinvtransformdata(

Check warning on line 450 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L450

Added line #L450 was not covered by tests
data::AbstractDiskArray{T,N},fv::Tuple{},scale_factor::Nothing,
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,maskingvalue,::Type{T}) where {T,N}
# no transformation necessary (avoid allocation)
return data

Check warning on line 454 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L454

Added line #L454 was not covered by tests
end



# this function is necessary to avoid "iterating" over a single character in Julia 1.0 (fixed Julia 1.3)
# https://discourse.julialang.org/t/broadcasting-and-single-characters/16836
#@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
#@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)

function Base.getindex(v::CFVariable, indexes::TIndices...)
data = parent(v)[indexes...]
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),maskingvalue(v),eltype(v))
function DiskArrays.readblock!(v::CFVariable{T, N},
aout,
indexes::Vararg{OrdinalRange, N}) where {T, N}

parent_var = parent(v)
data = similar(aout, eltype(parent_var))
DiskArrays.readblock!(parent_var, data, indexes...)

aout .= CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),maskingvalue(v),eltype(v))


return nothing
end

function Base.setindex!(v::CFVariable,data::Array{Missing,N},indexes::TIndices...) where N
parent(v)[indexes...] = fill(fillvalue(v),size(data))

function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Array{Missing,N}, indexes::Vararg{OrdinalRange, N}) where {T, N}
if N == 0
parent(v)[] = fill(fillvalue(v),size(data))

Check warning on line 482 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L482

Added line #L482 was not covered by tests
else
DiskArrays.writeblock!(parent(v), fill(fillvalue(v),size(data)), indexes...)
end
end

function Base.setindex!(v::CFVariable,data::Missing,indexes::TIndices...)
parent(v)[indexes...] = fillvalue(v)
function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Missing, indexes::Vararg{OrdinalRange, N}) where {T, N}
if N == 0
parent(v)[] = fillvalue(v)

Check warning on line 490 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L488-L490

Added lines #L488 - L490 were not covered by tests
else
parent(v)[indexes...] = fillvalue(v)

Check warning on line 492 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L492

Added line #L492 was not covered by tests
end
end

function Base.setindex!(v::CFVariable,data::Union{T,Array{T}},indexes::TIndices...) where T <: Union{AbstractCFDateTime,DateTime,Missing}

function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Union{DT,Array{DT}}, indexes::Vararg{OrdinalRange, N}) where {T, N, DT <: Union{AbstractCFDateTime,DateTime,Missing}}
if calendar(v) !== nothing
# can throw an convertion error if calendar attribute already exists and
# is incompatible with the provided data
parent(v)[indexes...] = CFinvtransformdata(
data_transformed = CFinvtransformdata(
data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),
maskingvalue(v),
eltype(parent(v)))
if N==0
parent(v)[] = data_transformed

Check warning on line 507 in src/cfvariable.jl

View check run for this annotation

Codecov / codecov/patch

src/cfvariable.jl#L507

Added line #L507 was not covered by tests
else
DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
end

return data
end

@error "Time units and calendar must be defined during defVar and cannot change"
end

function DiskArrays.writeblock!(v::CFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}

data_transformed = CFinvtransformdata(
data,fill_and_missing_values(v),
scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),
maskingvalue(v),
eltype(parent(v)))

function Base.setindex!(v::CFVariable,data,indexes::TIndices...)
parent(v)[indexes...] = CFinvtransformdata(
data,fill_and_missing_values(v),
scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),
maskingvalue(v),
eltype(parent(v)))
if N == 0
parent(v)[] = data_transformed
else
DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
end

return data
end



# can be implemented overridden for faster implementation
function boundsParentVar(ds,varname)
for vn in varnames(ds)
Expand Down
2 changes: 1 addition & 1 deletion src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
end


function Base.setindex!(ds::AbstractDataset,data::AbstractVariable,varname::SymbolOrString)
function Base.setindex!(ds::AbstractDataset,data::Union{AbstractVariable, SubVariable},varname::SymbolOrString)
return defVar(ds, varname, data)
end

Expand Down
4 changes: 2 additions & 2 deletions src/defer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ variable(dds::DeferDataset,varname::Symbol) = variable(dds,string(varname))
dataset(dv::DeferVariable{T,N,TDS}) where {T,N,TDS} =
DeferDataset(TDS,dv.r.filename,dv.r.mode; dv.r.args...)

function Base.getindex(dv::DeferVariable,indexes::Union{Int,Colon,AbstractRange{<:Integer}}...)
function DiskArrays.readblock!(dv::DeferVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
Variable(dv) do v
return v[indexes...]
aout .= v[indexes...]
end
end

Expand Down
16 changes: 16 additions & 0 deletions src/groupby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,9 @@ Base.BroadcastStyle(::Type{<:ReducedGroupedVariable}) = ReducedGroupedVariableSt
Base.BroadcastStyle(::DefaultArrayStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DefaultArrayStyle) = ReducedGroupedVariableStyle()

Base.BroadcastStyle(::DiskArrays.ChunkStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReducedGroupedVariable use different broadcasting than DiskArrays. This might lead to some confusion.

Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DiskArrays.ChunkStyle) = ReducedGroupedVariableStyle()

function Base.similar(bc::Broadcasted{ReducedGroupedVariableStyle}, ::Type{ElType}) where ElType
# Scan the inputs for the ReducedGroupedVariable:
A = find_gv(ReducedGroupedVariable,bc)
Expand Down Expand Up @@ -587,3 +590,16 @@ function dataset(gr::ReducedGroupedVariable)
gr.reduce_fun,
)
end

# ReducedGroupedVariable is a Variable and therefore an AbstractDiskArray.
# getindex is overloaded for ReducedGroupedVariable and therefore
# ReducedGroupedVariable is defined to use getindex.
# An alternative solution is to remove getindex definitions
# and then implement the read logic in readblock!
function DiskArrays.readblock!(gr::ReducedGroupedVariable{T,N},
aout,
indexes::Vararg{OrdinalRange, N}) where {T,N}

aout .= Base.getindex(gr,indexes...)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently Base.getindex is defined for ReducedGroupedVariable so the DiskArray.jl implementation is not used. An alternative would be to use the DiskArray getindex and implement more logic in readblock!. This is more ReducedGroupedVariable and I can not see any benefit of doing it right now.

return aout
end
20 changes: 17 additions & 3 deletions src/memory_dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,23 @@ end

Base.parent(v::MemoryVariable) = v.data
Base.size(v::MemoryVariable) = size(parent(v))
Base.getindex(v::MemoryVariable,ij::TIndices...) = parent(v)[ij...]
function Base.setindex!(v::MemoryVariable,data,ij...)

function DiskArrays.readblock!(v::MemoryVariable{T, N},
aout,
indexes::Vararg{OrdinalRange, N}) where {T, N}

aout .= parent(v)[indexes...]
end


function DiskArrays.writeblock!(v::MemoryVariable{T, N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
sz = size(v)
parent(v)[ij...] = data

if N == 0
parent(v)[] = data[]
else
parent(v)[indexes...] = data
end

root = _root(v)
for idim = findall(size(v) .> sz)
Expand All @@ -83,6 +96,7 @@ function Base.setindex!(v::MemoryVariable,data,ij...)
return data
end


CDM.load!(v::MemoryVariable,buffer,ij...) = buffer .= view(parent(v),ij...)
CDM.name(v::Union{MemoryVariable,MemoryDataset}) = v.name
CDM.dimnames(v::MemoryVariable) = v.dimnames
Expand Down
42 changes: 35 additions & 7 deletions src/multifile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,33 @@
Base.parent(v::MFVariable) = v.var
Base.parent(v::MFCFVariable) = v.var
Base.Array(v::MFVariable) = Array(parent(v))
Base.getindex(v::MFVariable,indexes::TIndices...) = getindex(parent(v),indexes...)
Base.setindex!(v::MFVariable,data,indexes::TIndices...) = setindex!(parent(v),data,indexes...)

function DiskArrays.readblock!(v::MFVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
aout .= parent(v)[indexes...]
end
function DiskArrays.readblock!(v::MFCFVariable{T, N}, aout, indexes::Vararg{OrdinalRange, N}) where {T, N}
aout .= v.cfvar[indexes...]
end

function DiskArrays.writeblock!(v::MFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
if N == 0
parent(v)[] = data[]

Check warning on line 194 in src/multifile.jl

View check run for this annotation

Codecov / codecov/patch

src/multifile.jl#L192-L194

Added lines #L192 - L194 were not covered by tests
else
parent(v)[indexes...] = data

Check warning on line 196 in src/multifile.jl

View check run for this annotation

Codecov / codecov/patch

src/multifile.jl#L196

Added line #L196 was not covered by tests
end
end

function DiskArrays.writeblock!(v::MFCFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
if N == 0
v.cfvar[] = data[]

Check warning on line 202 in src/multifile.jl

View check run for this annotation

Codecov / codecov/patch

src/multifile.jl#L202

Added line #L202 was not covered by tests
else
v.cfvar[indexes...] = data
end
end

Base.size(v::MFVariable) = size(parent(v))
Base.size(v::MFCFVariable) = size(parent(v))
Base.getindex(v::MFCFVariable,ind::TIndices...) = v.cfvar[ind...]
Base.setindex!(v::MFCFVariable,data,ind::TIndices...) = v.cfvar[ind...] = data

function Base.cat(vs::AbstractVariable...; dims::Integer)
CatArrays.CatArray(dims,vs...)
end
Expand Down Expand Up @@ -288,10 +309,17 @@
storage,chunksizes = chunking(v1)

if storage == :contiguous
return (:chunked, size(v1))
else
return storage,chunksizes
storage = :chunked
chunksizes = size(v1)
end

if ndims(v) == (length(chunksizes)+1)
cat_dim = v.var.dim
chunksizes = (chunksizes[1:(cat_dim-1)]...,1,chunksizes[cat_dim:end]...)
end

@assert ndims(v) == length(chunksizes)
return storage, chunksizes
end

deflate(v::MFVariable) = deflate(v.ds.ds[1][name(v)])
Expand Down
2 changes: 1 addition & 1 deletion src/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ function coordinate_value(v::AbstractVariable,name_coord::Symbol)
end


function coordinate_names(v::AbstractVariable)
function coordinate_names(v::Union{AbstractVariable,SubVariable})
ds = dataset(v)
dimension_names = dimnames(v)

Expand Down
Loading
Loading