JuliaGeo · lupemba · May 3, 2025 · Jun 19, 2025 · Jun 20, 2025 · Jun 23, 2025
diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml
@@ -23,6 +23,7 @@ jobs:
           - {user: JuliaGeo,        repo: GRIBDatasets.jl}
           - {user: Alexander-Barth, repo: TIFFDatasets.jl}
           - {user: JuliaGeo,        repo: ZarrDatasets.jl}
+          - {user: eumetsat,        repo: MetopDatasets.jl}
 
     steps:
       - uses: actions/checkout@v4

diff --git a/Project.toml b/Project.toml
@@ -4,12 +4,13 @@ keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography"
 license = "MIT"
 desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
 authors = ["Alexander Barth <[email protected]>"]
-version = "0.3.9"
+version = "0.4.0"
 
 [deps]
 CFTime = "179af706-886a-5703-950a-314cd64e0468"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -18,6 +19,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 CFTime = "0.1.1, 0.2"
 DataStructures = "0.17, 0.18"
 Dates = "1"
+DiskArrays = "0.4.15"
 Preferences = "1.3"
 Printf = "1"
 Statistics = "1"

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ This package contains abstracts type definition for loading and manipulating GRI
 
 
 Features include:
-* query and edit metadata of arrays and datasets 
+* query and edit metadata of arrays and datasets
 * virtually concatenating multiple files along a given dimension and merging virtually different datasets
 * create a virtual subset (`view`) by indices or by values of coordinate variables (`CommonDataModel.select`, `CommonDataModel.@select`)
 * group, map and reduce a variable (`CommonDataModel.groupby`, `CommonDataModel.@groupby`) and rolling reductions like running means `CommonDataModel.rolling`)

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -4,12 +4,17 @@ CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
+CairoMakie = "0.10"
+Dates = "1"
+Documenter = "1"
+Downloads = "1"
+IntervalSets = "0.7"
+Literate = "2"
 NCDatasets = "0.14"
-GLMakie = "0.8"
+Statistics = "1"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,3 +1,6 @@
+using Pkg
+Pkg.activate(@__DIR__)
+
 using Documenter: Documenter, makedocs, deploydocs
 using CommonDataModel
 using Literate
@@ -6,7 +9,7 @@ Literate.markdown(
     "docs/src/tutorial1.jl","docs/src",
     execute = true,
     documenter = true,
-    # We add the credit to Literate.jl the footer
+    # We add the credit to Literate.jl in the footer
     credit = false,
 )
 
@@ -32,6 +35,6 @@ makedocs(;
     ],
 )
 
-deploydocs(;
+deploydocs(
     repo="github.com/JuliaGeo/CommonDataModel.jl",
 )
diff --git a/src/CommonDataModel.jl b/src/CommonDataModel.jl
@@ -7,6 +7,20 @@ using Dates
 using Printf
 using Preferences
 using DataStructures
+import DiskArrays: 
+    AbstractDiskArray, 
+    AbstractSubDiskArray, 
+    subarray,
+    writeblock!,
+    readblock!,
+    ChunkStyle,
+    haschunks,
+    eachchunk,
+    Unchunked,
+    Chunked,
+    GridChunks
+
+import DiskArrays
 import Base:
     LogicalIndex,
     checkbounds,

diff --git a/src/attribute.jl b/src/attribute.jl
@@ -35,7 +35,7 @@ function delAttrib(ds::Union{AbstractDataset,AbstractVariable},name::SymbolOrStr
 end
 
 
-attribs(ds::Union{AbstractDataset,AbstractVariable}) =
+attribs(ds::Union{AbstractDataset,AbstractVariable, SubVariable}) =
     OrderedDict((dn,attrib(ds,dn)) for dn in attribnames(ds))
 
 

diff --git a/src/cfconventions.jl b/src/cfconventions.jl
@@ -17,7 +17,7 @@ variable `ncv` with the
 standard name modifier `modifier`. It can be used for example to access
 related variable like status flags.
 """
-function ancillaryvariables(ncv::CFVariable,modifier)
+function ancillaryvariables(ncv::Union{CFVariable, SubVariable{<:Any,<:Any,<:CFVariable}},modifier)
     ds = dataset(ncv)
     varname = name(ncv)
 
@@ -44,7 +44,7 @@ allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Mis
 
 
 
-function _filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
+function _filter(ncv::Union{AbstractVariable,SubVariable}, indices...; accepted_status_flags = nothing)
     data = allowmissing(ncv[indices...])
 
     if (accepted_status_flags != nothing)
@@ -99,13 +99,13 @@ good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_dat
 ```
 
 """
-filter(ncv::AbstractVariable, indices::TIndices...; kwargs...) =
+filter(ncv::Union{AbstractVariable,SubVariable}, indices::TIndices...; kwargs...) =
     _filter(ncv, indices...; kwargs...)
 
-filter(ncv::AbstractVariable, indices::Union{Vector{<:Integer}, Array{<:CartesianIndex}}...; kwargs...) =
+filter(ncv::Union{AbstractVariable,SubVariable}, indices::Union{Vector{<:Integer}, Array{<:CartesianIndex}}...; kwargs...) =
     _filter(ncv, indices...; kwargs...)
 
-filter(ncv::AbstractVariable, indices::BitVector; kwargs...) =
+filter(ncv::Union{AbstractVariable,SubVariable}, indices::BitVector; kwargs...) =
     _filter(ncv, indices...; kwargs...)
 
 """
@@ -127,7 +127,7 @@ v = ncv[:]
 close(ds)
 ```
 """
-function coord(v::AbstractVariable,standard_name)
+function coord(v::Union{AbstractVariable,SubVariable},standard_name)
     matches = Dict(
         "time" => [r".*since.*"],
         # It is great to have choice!

diff --git a/src/cfvariable.jl b/src/cfvariable.jl
@@ -84,7 +84,7 @@ ds = NCDataset("foo.nc");
 close(ds)
 ```
 """
-function cfvariable(ds,
+function cfvariable(ds, 
                     varname;
                     _v = variable(ds,varname),
                     attrib = _v.attrib,
@@ -441,56 +441,83 @@ end
     return CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,maskingvalue,DT)
 end
 
+## Define for DiskArrays
+@inline function CFinvtransformdata(data::AbstractDiskArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT) where {T,N}
+    data_materialized = Array(data)
+    return CFinvtransformdata(data_materialized,fv,scale_factor,add_offset,time_origin,time_factor,maskingvalue,DT)
+end
+
+@inline function CFinvtransformdata(
+    data::AbstractDiskArray{T,N},fv::Tuple{},scale_factor::Nothing,
+    add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,maskingvalue,::Type{T}) where {T,N}
+    # no transformation necessary (avoid allocation)
+    return data
+end
+
 
 
 # this function is necessary to avoid "iterating" over a single character in Julia 1.0 (fixed Julia 1.3)
 # https://discourse.julialang.org/t/broadcasting-and-single-characters/16836
 #@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
 #@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)
 
-function Base.getindex(v::CFVariable, indexes::TIndices...)
-    data = parent(v)[indexes...]
-    return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
-                           time_origin(v),time_factor(v),maskingvalue(v),eltype(v))
+function DiskArrays.readblock!(v::CFVariable{T, N},
+    aout,
+    indexes::Vararg{OrdinalRange, N}) where {T, N}
+
+    parent_var = parent(v)
+    data = similar(aout, eltype(parent_var))
+    DiskArrays.readblock!(parent_var, data, indexes...)
+
+    aout .= CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
+        time_origin(v),time_factor(v),maskingvalue(v),eltype(v))
+
+
+    return nothing
 end
 
-function Base.setindex!(v::CFVariable,data::Array{Missing,N},indexes::TIndices...) where N
-    parent(v)[indexes...] = fill(fillvalue(v),size(data))
+
+function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Array{Missing,N}, indexes::Vararg{OrdinalRange, N}) where {T, N}
+    parent(v)[indexes...] .= fillvalue(v)
 end
 
-function Base.setindex!(v::CFVariable,data::Missing,indexes::TIndices...)
-    parent(v)[indexes...] = fillvalue(v)
+function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Missing, indexes::Vararg{OrdinalRange, N}) where {T, N}
+    parent(v)[indexes...] .= fillvalue(v)
 end
 
-function Base.setindex!(v::CFVariable,data::Union{T,Array{T}},indexes::TIndices...) where T <: Union{AbstractCFDateTime,DateTime,Missing}
 
+function DiskArrays.writeblock!(v::CFVariable{T, N}, data::Union{DT,Array{DT}}, indexes::Vararg{OrdinalRange, N}) where {T, N, DT <: Union{AbstractCFDateTime,DateTime,Missing}}
     if calendar(v) !== nothing
         # can throw an convertion error if calendar attribute already exists and
         # is incompatible with the provided data
-        parent(v)[indexes...] = CFinvtransformdata(
+        data_transformed = CFinvtransformdata(
             data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
             time_origin(v),time_factor(v),
             maskingvalue(v),
             eltype(parent(v)))
+
+        DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
+
         return data
     end
 
     @error "Time units and calendar must be defined during defVar and cannot change"
 end
 
+function DiskArrays.writeblock!(v::CFVariable{T,N}, data, indexes::Vararg{OrdinalRange, N}) where {T, N}
 
-function Base.setindex!(v::CFVariable,data,indexes::TIndices...)
-    parent(v)[indexes...] = CFinvtransformdata(
-        data,fill_and_missing_values(v),
-        scale_factor(v),add_offset(v),
-        time_origin(v),time_factor(v),
-        maskingvalue(v),
-        eltype(parent(v)))
+    data_transformed = CFinvtransformdata(
+            data,fill_and_missing_values(v),
+            scale_factor(v),add_offset(v),
+            time_origin(v),time_factor(v),
+            maskingvalue(v),
+            eltype(parent(v)))
 
-    return data
+    DiskArrays.writeblock!(parent(v), data_transformed, indexes...)
 end
 
 
+
 # can be implemented overridden for faster implementation
 function boundsParentVar(ds,varname)
     for vn in varnames(ds)
@@ -526,7 +553,7 @@ function _getattrib(ds,v,parentname,attribname,default)
     end
 end
 
-function _isrelated(v1::AbstractVariable,v2::AbstractVariable)
+function _isrelated(v1::Union{AbstractVariable,SubVariable},v2::Union{AbstractVariable,SubVariable})
     dimnames(v1) ⊆ dimnames(v2)
 end
 

diff --git a/src/dataset.jl b/src/dataset.jl
@@ -171,7 +171,7 @@ function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
 end
 
 
-function Base.setindex!(ds::AbstractDataset,data::AbstractVariable,varname::SymbolOrString)
+function Base.setindex!(ds::AbstractDataset,data::Union{AbstractVariable, SubVariable},varname::SymbolOrString)
     return defVar(ds, varname, data)
 end
 
@@ -202,7 +202,7 @@ julia> data = varbyattrib(ds, standard_name = "longitude")[1][:]
 ```
 
 """
-function varbyattrib(ds::Union{AbstractDataset,AbstractVariable}; kwargs...)
+function varbyattrib(ds::Union{AbstractDataset,AbstractVariable, SubVariable}; kwargs...)
     # Start with an empty list of variables
     varlist = []
 
@@ -385,6 +385,15 @@ end
     end
 end
 
+@inline function Base.propertynames(ds::Union{AbstractDataset,AbstractVariable},private::Bool=false)
+    names = fieldnames(typeof(ds))
+
+    if ds isa AbstractDataset
+        return (names...,:attrib,:dim,:group)
+    else
+        return (names...,:attrib,:dim)
+    end
+end
 
 for (item_color,default) in (
     (:section_color, :red),

diff --git a/src/defer.jl b/src/defer.jl
@@ -111,9 +111,9 @@ variable(dds::DeferDataset,varname::Symbol) = variable(dds,string(varname))
 dataset(dv::DeferVariable{T,N,TDS}) where {T,N,TDS} =
     DeferDataset(TDS,dv.r.filename,dv.r.mode; dv.r.args...)
 
-function Base.getindex(dv::DeferVariable,indexes::Union{Int,Colon,AbstractRange{<:Integer}}...)
+function DiskArrays.readblock!(dv::DeferVariable{T, N}, aout,indexes::Vararg{OrdinalRange, N}) where {T, N}
     Variable(dv) do v
-        return v[indexes...]
+        aout .= v[indexes...]
     end
 end
 

diff --git a/src/groupby.jl b/src/groupby.jl
@@ -279,7 +279,7 @@ function Base.broadcasted(::GroupedVariableStyle,f::Function,A::GroupedVariable{
         A.v,A.coordname,A.group_fun,A.groupmap,A.dim,map_fun)
 end
 
-function GroupedVariable(v::TV,coordname,group_fun::TF,groupmap,dim,map_fun::TM) where TV <: AbstractVariable where {TF,TM}
+function GroupedVariable(v::TV,coordname,group_fun::TF,groupmap,dim,map_fun::TM) where TV <: Union{AbstractVariable,SubVariable} where {TF,TM}
     TGM = typeof(groupmap)
 
     #TG = Base.return_types(selectdim,(TV,Int,Int,))[1]
@@ -361,7 +361,7 @@ close(ds)
 ```
 
 """
-function groupby(v::AbstractVariable,(coordname,group_fun)::Pair{<:SymbolOrString,TF}) where TF
+function groupby(v::Union{AbstractVariable,SubVariable},(coordname,group_fun)::Pair{<:SymbolOrString,TF}) where TF
     # for NCDatasets 0.12
     c = v[String(coordname)][:]
     class = group_fun.(c)
@@ -462,6 +462,9 @@ Base.BroadcastStyle(::Type{<:ReducedGroupedVariable}) = ReducedGroupedVariableSt
 Base.BroadcastStyle(::DefaultArrayStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
 Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DefaultArrayStyle) = ReducedGroupedVariableStyle()
 
+Base.BroadcastStyle(::DiskArrays.ChunkStyle,::ReducedGroupedVariableStyle) = ReducedGroupedVariableStyle()
+Base.BroadcastStyle(::ReducedGroupedVariableStyle,::DiskArrays.ChunkStyle) = ReducedGroupedVariableStyle()
+
 function Base.similar(bc::Broadcasted{ReducedGroupedVariableStyle}, ::Type{ElType})  where ElType
     # Scan the inputs for the ReducedGroupedVariable:
     A = find_gv(ReducedGroupedVariable,bc)
@@ -587,3 +590,16 @@ function dataset(gr::ReducedGroupedVariable)
         gr.reduce_fun,
     )
 end
+
+# ReducedGroupedVariable is a Variable and therefore an AbstractDiskArray.
+# getindex is overloaded for ReducedGroupedVariable and therefore 
+# ReducedGroupedVariable is defined to use getindex.
+# An alternative solution is to remove getindex definitions 
+# and then implement the read logic in readblock!
+function DiskArrays.readblock!(gr::ReducedGroupedVariable{T,N},
+    aout,
+    indexes::Vararg{OrdinalRange, N}) where {T,N}
+
+    aout .= Base.getindex(gr,indexes...)
+    return aout
+end
-Original file line number
+Diff line change
@@ Expand Up @@
     end
-    attribs(ds::Union{AbstractDataset,AbstractVariable}) =
+    attribs(ds::Union{AbstractDataset,AbstractVariable, SubVariable}) =
         OrderedDict((dn,attrib(ds,dn)) for dn in attribnames(ds))
@@ Expand Down @@