Skip to content

Make AbstractVariable a subtype of AbstractDiskArray #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ version = "0.3.9"
CFTime = "179af706-886a-5703-950a-314cd64e0468"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -18,6 +19,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
CFTime = "0.1.1, 0.2"
DataStructures = "0.17, 0.18"
Dates = "1"
DiskArrays = "0.4.12"
Preferences = "1.3"
Printf = "1"
Statistics = "1"
Expand Down
15 changes: 15 additions & 0 deletions src/CommonDataModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@ using Dates
using Printf
using Preferences
using DataStructures
import DiskArrays:
AbstractDiskArray,
AbstractSubDiskArray,
subarray,
writeblock!,
readblock!,
ChunkStyle,
haschunks,
eachchunk,
Unchunked,
Chunked,
GridChunks,
_replace_colon

import DiskArrays
import Base:
LogicalIndex,
checkbounds,
Expand Down
2 changes: 1 addition & 1 deletion src/attribute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function delAttrib(ds::Union{AbstractDataset,AbstractVariable},name::SymbolOrStr
end


attribs(ds::Union{AbstractDataset,AbstractVariable}) =
attribs(ds::Union{AbstractDataset,AbstractVariable, SubVariable}) =
OrderedDict((dn,attrib(ds,dn)) for dn in attribnames(ds))


Expand Down
25 changes: 15 additions & 10 deletions src/cfvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -448,22 +448,30 @@ end
#@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
#@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)

function Base.getindex(v::CFVariable, indexes::TIndices...)
function DiskArrays.readblock!(v::CFVariable{T, N},
aout,
indexes::Vararg{OrdinalRange, N}) where {T, N}

data = parent(v)[indexes...]
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),maskingvalue(v),eltype(v))

aout .= CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),maskingvalue(v),eltype(v))


return nothing
end

function Base.setindex!(v::CFVariable,data::Array{Missing,N},indexes::TIndices...) where N

function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Array{Missing,N}, indexes::Vararg{OrdinalRange, N}) where {T, N}
parent(v)[indexes...] = fill(fillvalue(v),size(data))
end

function Base.setindex!(v::CFVariable,data::Missing,indexes::TIndices...)
function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Missing, indexes::Vararg{OrdinalRange, N}) where {T, N}
parent(v)[indexes...] = fillvalue(v)
end

function Base.setindex!(v::CFVariable,data::Union{T,Array{T}},indexes::TIndices...) where T <: Union{AbstractCFDateTime,DateTime,Missing}

function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Union{DT,Array{DT}}, indexes::Vararg{OrdinalRange, N}) where {T, N, DT <: Union{AbstractCFDateTime,DateTime,Missing}}
if calendar(v) !== nothing
# can throw an convertion error if calendar attribute already exists and
# is incompatible with the provided data
Expand All @@ -478,16 +486,13 @@ function Base.setindex!(v::CFVariable,data::Union{T,Array{T}},indexes::TIndices.
@error "Time units and calendar must be defined during defVar and cannot change"
end


function Base.setindex!(v::CFVariable,data,indexes::TIndices...)
function DiskArrays.writeblock!(v::CFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
parent(v)[indexes...] = CFinvtransformdata(
data,fill_and_missing_values(v),
scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),
maskingvalue(v),
eltype(parent(v)))

return data
end


Expand Down
2 changes: 1 addition & 1 deletion src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
end


function Base.setindex!(ds::AbstractDataset,data::AbstractVariable,varname::SymbolOrString)
function Base.setindex!(ds::AbstractDataset,data::Union{AbstractVariable, SubVariable},varname::SymbolOrString)
return defVar(ds, varname, data)
end

Expand Down
4 changes: 2 additions & 2 deletions src/defer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ variable(dds::DeferDataset,varname::Symbol) = variable(dds,string(varname))
dataset(dv::DeferVariable{T,N,TDS}) where {T,N,TDS} =
DeferDataset(TDS,dv.r.filename,dv.r.mode; dv.r.args...)

function Base.getindex(dv::DeferVariable,indexes::Union{Int,Colon,AbstractRange{<:Integer}}...)
function DiskArrays.readblock!(dv::DeferVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
Variable(dv) do v
return v[indexes...]
aout .= v[indexes...]
end
end

Expand Down
3 changes: 3 additions & 0 deletions src/groupby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,9 @@ Base.BroadcastStyle(::Type{<:ReducedGroupedVariable}) = ReducedGroupedVariableSt
Base.BroadcastStyle(::DefaultArrayStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DefaultArrayStyle) = ReducedGroupedVariableStyle()

Base.BroadcastStyle(::DiskArrays.ChunkStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReducedGroupedVariable use different broadcasting than DiskArrays. This might lead to some confusion.

Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DiskArrays.ChunkStyle) = ReducedGroupedVariableStyle()

function Base.similar(bc::Broadcasted{ReducedGroupedVariableStyle}, ::Type{ElType}) where ElType
# Scan the inputs for the ReducedGroupedVariable:
A = find_gv(ReducedGroupedVariable,bc)
Expand Down
21 changes: 18 additions & 3 deletions src/memory_dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,24 @@ end

Base.parent(v::MemoryVariable) = v.data
Base.size(v::MemoryVariable) = size(parent(v))
Base.getindex(v::MemoryVariable,ij::TIndices...) = parent(v)[ij...]
function Base.setindex!(v::MemoryVariable,data,ij...)

function DiskArrays.readblock!(v::MemoryVariable{T, N},
aout,
indexes::Vararg{OrdinalRange, N}) where {T, N}

aout .= parent(v)[indexes...]
end


function DiskArrays.writeblock!(v::MemoryVariable{T, 0}, data, indexes::Vararg{OrdinalRange, 0}) where {T}
parent(v)[] = data[]
return data
end


function DiskArrays.writeblock!(v::MemoryVariable{T, N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
sz = size(v)
parent(v)[ij...] = data
parent(v)[indexes...] = data

root = _root(v)
for idim = findall(size(v) .> sz)
Expand All @@ -83,6 +97,7 @@ function Base.setindex!(v::MemoryVariable,data,ij...)
return data
end


CDM.load!(v::MemoryVariable,buffer,ij...) = buffer .= view(parent(v),ij...)
CDM.name(v::Union{MemoryVariable,MemoryDataset}) = v.name
CDM.dimnames(v::MemoryVariable) = v.dimnames
Expand Down
34 changes: 27 additions & 7 deletions src/multifile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,25 @@ maskingvalue(mfds::MFDataset) = maskingvalue(mfds.ds[1])
Base.parent(v::MFVariable) = v.var
Base.parent(v::MFCFVariable) = v.var
Base.Array(v::MFVariable) = Array(parent(v))
Base.getindex(v::MFVariable,indexes::TIndices...) = getindex(parent(v),indexes...)
Base.setindex!(v::MFVariable,data,indexes::TIndices...) = setindex!(parent(v),data,indexes...)

function DiskArrays.readblock!(v::MFVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
aout .= parent(v)[indexes...]
end
function DiskArrays.readblock!(v::MFCFVariable{T, N}, aout, indexes::Vararg{OrdinalRange, N}) where {T, N}
aout .= v.cfvar[indexes...]
end

function DiskArrays.writeblock!(v::MFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
parent(v)[indexes...]= data
end

function DiskArrays.writeblock!(v::MFCFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
v.cfvar[indexes...] = data
end

Base.size(v::MFVariable) = size(parent(v))
Base.size(v::MFCFVariable) = size(parent(v))
Base.getindex(v::MFCFVariable,ind::TIndices...) = v.cfvar[ind...]
Base.setindex!(v::MFCFVariable,data,ind::TIndices...) = v.cfvar[ind...] = data

function Base.cat(vs::AbstractVariable...; dims::Integer)
CatArrays.CatArray(dims,vs...)
end
Expand Down Expand Up @@ -288,10 +301,17 @@ function chunking(v::MFVariable)
storage,chunksizes = chunking(v1)

if storage == :contiguous
return (:chunked, size(v1))
else
return storage,chunksizes
storage = :chunked
chunksizes = size(v1)
end

if ndims(v) == (length(chunksizes)+1)
cat_dim = v.var.dim
chunksizes = (chunksizes[1:(cat_dim-1)]...,1,chunksizes[cat_dim:end]...)
end

@assert ndims(v) == length(chunksizes)
return storage, chunksizes
end

deflate(v::MFVariable) = deflate(v.ds.ds[1][name(v)])
Expand Down
130 changes: 55 additions & 75 deletions src/subvariable.jl
Original file line number Diff line number Diff line change
@@ -1,73 +1,68 @@

Base.parent(v::SubVariable) = v.parent
Base.parentindices(v::SubVariable) = v.indices
Base.size(v::SubVariable) = _shape_after_slice(size(parent(v)),v.indices...)
function Base.show(io::IO, v::SubVariable)
level = get(io, :level, 0)
indent = " " ^ get(io, :level, 0)
delim = " × "
try
indices = parentindices(v)
print(io,indent,"View: ",join(indices,delim),"\n")
show(IOContext(io,:level=>level+1),parent(v))
catch err
@warn "error in show" err
print(io,"SubVariable (dataset closed)")
end
end

Base.show(io::IO,::MIME"text/plain",v::SubVariable) = show(io,v)

DiskArrays.subarray(v::SubVariable) = v.v

function Base.getproperty(sub_var::SubVariable, name::Symbol)
if !hasfield(typeof(sub_var),name)
parent_var = parent(sub_var)
if name == :var
# if var also return a view
return view(parent_var.var, sub_indices(sub_var)...)
else
return Base.getproperty(parent_var, name)
end
else
return getfield(sub_var,name) # get field from sub_var
end
end

function dimnames(v::SubVariable)
dimension_names = dimnames(parent(v))
return dimension_names[map(i -> !(i isa Integer),collect(v.indices))]
return dimension_names[map(i -> !(i isa Integer),collect(parentindices(v)))]
end

name(v::SubVariable) = name(parent(v))

attribnames(v::SubVariable) = attribnames(parent(v))
attrib(v::SubVariable,name::SymbolOrString) = attrib(parent(v),name)
defAttrib(v::SubVariable,name::SymbolOrString,data) = defAttrib(parent(v),name,data)

function SubVariable(A::AbstractVariable,indices...)
var = nothing
if hasproperty(A,:var)
if hasmethod(SubVariable,Tuple{typeof(A.var),typeof.(indices)...})
var = SubVariable(A.var,indices...)
end
end

T = eltype(A)
N = length(size_getindex(A,indices...))
return SubVariable{T,N,typeof(A),typeof(indices),typeof(A.attrib),typeof(var)}(
A,indices,A.attrib,var)
materialize(v::SubVariable) = parent(v)[sub_indices(v)]
sub_indices(v::SubVariable) = DiskArrays.subarray(v).indices

function map_indices(parent_var::AbstractVariable, selected_var::AbstractVariable,
indices_subvariable)

dims_selected = dimnames(selected_var)
dims_var= dimnames(parent_var)
dim_mapping = [findfirst( x-> x==d, dims_var) for d in dims_selected]

indices_selected = indices_subvariable[dim_mapping]
return indices_selected
end

SubVariable(A::AbstractVariable{T,N}) where T where N = SubVariable(A,ntuple(i -> :,N)...)

# recursive calls so that the compiler can infer the types via inline-ing
# and constant propagation
_subsub(indices,i,l) = indices
_subsub(indices,i,l,ip,rest...) = _subsub((indices...,ip[i[l]]),i,l+1,rest...)
_subsub(indices,i,l,ip::Number,rest...) = _subsub((indices...,ip),i,l,rest...)
_subsub(indices,i,l,ip::Colon,rest...) = _subsub((indices...,i[l]),i,l+1,rest...)

#=
j = subsub(parentindices,indices)
## getting the related var also returns a SubVariable
function Base.getindex(sub_var::SubVariable,n::Union{CFStdName,SymbolOrString})
parent_var = parent(sub_var)
selected_var = parent_var[n]

Computed the tuple of indices `j` so that
`A[parentindices...][indices...] = A[j...]` for any array `A` and any tuple of
valid indices `parentindices` and `indices`
=#
subsub(parentindices,indices) = _subsub((),indices,1,parentindices...)

materialize(v::SubVariable) = parent(v)[v.indices...]

"""
collect always returns an array.
Even if the result of the indexing is a scalar, it is wrapped
into a zero-dimensional array.
"""
function collect(v::SubVariable{T,N}) where T where N
if N == 0
A = Array{T,0}(undef,())
A[] = parent(v)[v.indices...]
return A
else
return parent(v)[v.indices...]
end
end

Base.Array(v::SubVariable) = collect(v)

function Base.view(v::SubVariable,indices::Union{<:Integer,Colon,AbstractVector{<:Integer}}...)
sub_indices = subsub(v.indices,indices)
SubVariable(parent(v),sub_indices...)
indices_selected = map_indices(parent_var, selected_var, sub_indices(sub_var))
return view(selected_var, indices_selected...)
end

"""
Expand Down Expand Up @@ -96,27 +91,12 @@ close(ds)
```

"""
Base.view(v::AbstractVariable,indices::Union{<:Integer,Colon,AbstractVector{<:Integer}}...) = SubVariable(v,indices...)
Base.view(v::SubVariable,indices::CartesianIndex) = view(v,indices.I...)
Base.view(v::SubVariable,indices::CartesianIndices) = view(v,indices.indices...)

Base.getindex(v::SubVariable,indices::Union{Int,Colon,AbstractRange{<:Integer}}...) = materialize(view(v,indices...))

Base.getindex(v::SubVariable,indices::CartesianIndex) = getindex(v,indices.I...)
Base.getindex(v::SubVariable,indices::CartesianIndices) =
getindex(v,indices.indices...)

function Base.setindex!(v::SubVariable,data,indices...)
sub_indices = subsub(v.indices,indices)
parent(v)[sub_indices...] = data
function Base.view(a::AbstractVariable,i...)
i2 = DiskArrays._replace_colon.(size(a), i) # TODO improve
return SubVariable(SubArray(a, i2))
end

Base.setindex!(v::SubVariable,data,indices::CartesianIndex) =
setindex!(v,data,indices.I...)
Base.setindex!(v::SubVariable,data,indices::CartesianIndices) =
setindex!(v,data,indices.indices...)


Base.view(a::AbstractVariable, i::CartesianIndices) = view(a, i.indices...)

dimnames(ds::SubDataset) = dimnames(ds.ds)
defDim(ds::SubDataset,name::SymbolOrString,len) = defDim(ds.ds,name,len)
Expand Down Expand Up @@ -176,7 +156,7 @@ groupname(ds::SubDataset) = groupname(ds.ds)


function dataset(v::SubVariable)
indices = (;((Symbol(d),i) for (d,i) in zip(dimnames(parent(v)),v.indices))...)
indices = (;((Symbol(d),i) for (d,i) in zip(dimnames(parent(v)), sub_indices(v)))...)
return SubDataset(dataset(parent(v)),indices)
end

Expand Down
Loading
Loading