@@ -13,7 +13,7 @@ using DiskArrays: DiskArrays, GridChunks
1313using Glob: glob
1414using DimensionalData: DimensionalData as DD
1515
16- export Dataset, Cube, open_dataset, to_dataset, savecube, savedataset
16+ export Dataset, Cube, open_dataset, to_dataset, savecube, savedataset, open_mfdataset
1717
1818"""
1919 Dataset object which stores an `OrderedDict` of YAXArrays with Symbol keys.
@@ -253,7 +253,7 @@ function collectdims(g)
253253 varnames = get_varnames (g)
254254 foreach (varnames) do k
255255 d = get_var_dims (g, k)
256- v = get_var_handle (g, k)
256+ v = get_var_handle (g, k, persist = false )
257257 for (len, dname) in zip (size (v), d)
258258 if ! occursin (" bnd" , dname) && ! occursin (" bounds" , dname)
259259 datts = if dname in varnames
@@ -277,7 +277,7 @@ function toaxis(dimname, g, offs, len)
277277 if ! haskey (g, dimname)
278278 return DD. rebuild (DD. name2dim (axname), 1 : len)
279279 end
280- ar = get_var_handle (g, dimname)
280+ ar = get_var_handle (g, dimname, persist = false )
281281 aratts = get_var_attrs (g, dimname)
282282 if match (r" ^(days)|(hours)|(seconds)|(months) since" ,lowercase (get (aratts," units" ," " ))) != = nothing
283283 tsteps = try
@@ -337,6 +337,63 @@ open_mfdataset(g::AbstractString; kwargs...) = open_mfdataset(_glob(g); kwargs..
337337open_mfdataset (g:: Vector{<:AbstractString} ; kwargs... ) =
338338merge_datasets (map (i -> open_dataset (i; kwargs... ), g))
339339
340+ function merge_new_axis (alldatasets, firstcube,var,mergedim)
341+ newdim = DD. rebuild (mergedim,1 : length (alldatasets))
342+ alldiskarrays = map (ds-> ds. cubes[var]. data,alldatasets). data
343+ newda = diskstack (alldiskarrays)
344+ newdims = (DD. dim (firstcube)... ,newdim)
345+ YAXArray (newdims,newda,deepcopy (firstcube. properties))
346+ end
347+ function merge_existing_axis (alldatasets,firstcube,var,mergedim)
348+ allaxvals = map (ds-> DD. dims (ds. cubes[var],mergedim). val,alldatasets)
349+ newaxvals = reduce (vcat,allaxvals)
350+ newdim = DD. rebuild (mergedim,newaxvals)
351+ alldiskarrays = map (ds-> ds. cubes[var]. data,alldatasets)
352+ istack = DD. dimnum (firstcube,mergedim)
353+ newshape = ntuple (i-> i!= istack ? 1 : length (alldiskarrays),ndims (firstcube))
354+ newda = DiskArrays. ConcatDiskArray (reshape (alldiskarrays,newshape))
355+ newdims = Base. setindex (firstcube. axes,newdim,istack)
356+ YAXArray (newdims,newda,deepcopy (firstcube. properties))
357+ end
358+
359+ """
360+ open_mfdataset(files::DD.DimVector{<:AbstractString}; kwargs...)
361+
362+ Opens and concatenates a list of dataset paths along the dimension specified in `files`.
363+ This method can be used when the generic glob-based version of open_mfdataset fails
364+ or is too slow.
365+ For example, to concatenate a list of annual NetCDF files along the `Ti` dimension,
366+ one can use:
367+
368+ ````julia
369+ files = ["1990.nc","1991.nc","1992.nc"]
370+ open_mfdataset(DD.DimArray(files,DD.Ti()))
371+ ````
372+
373+ alternatively, if the dimension to concatenate along does not exist yet, the
374+ dimension provided in the input arg is used:
375+
376+ ````julia
377+ files = ["a.nc","b.nc","c.nc"]
378+ open_mfdataset(DD.DimArray(files,DD.Dim{:NewDim}(["a","b","c"])))
379+ ````
380+ """
381+ function open_mfdataset (vec:: DD.DimVector{<:AbstractString} ;kwargs... )
382+ alldatasets = open_dataset .(vec;kwargs... );
383+ fi = first (alldatasets)
384+ mergedim = DD. dims (alldatasets) |> only
385+ ars = map (collect (keys (fi. cubes))) do var
386+ cfi = fi. cubes[var]
387+ mergedar = if DD. dims (cfi,mergedim) != = nothing
388+ merge_existing_axis (alldatasets,cfi,var,mergedim)
389+ else
390+ merge_new_axis (alldatasets,cfi,var,mergedim)
391+ end
392+ var => mergedar
393+ end
394+ Dataset (;ars... )
395+ end
396+
340397
341398"""
342399open_dataset(g; driver=:all)
@@ -345,44 +402,46 @@ Open the dataset at `g` with the given `driver`.
345402The default driver will search for available drivers and tries to detect the useable driver from the filename extension.
346403"""
347404function open_dataset (g; driver = :all )
348- g = YAXArrayBase. to_dataset (g, driver = driver)
349- isempty (get_varnames (g)) && throw (ArgumentError (" Group does not contain datasets." ))
350- dimlist = collectdims (g)
351- dnames = string .(keys (dimlist))
352- varlist = filter (get_varnames (g)) do vn
353- upname = uppercase (vn)
354- ! occursin (" BNDS" , upname) &&
355- ! occursin (" BOUNDS" , upname) &&
356- ! any (i -> isequal (upname, uppercase (i)), dnames)
357- end
358- allcubes = OrderedDict {Symbol,YAXArray} ()
359- for vname in varlist
360- vardims = get_var_dims (g, vname)
361- iax = tuple (collect (dimlist[vd]. ax for vd in vardims)... )
362- offs = [dimlist[vd]. offs for vd in vardims]
363- subs = if all (iszero, offs)
364- nothing
365- else
366- ntuple (i -> (offs[i]+ 1 ): (offs[i]+ length (iax[i])), length (offs))
367- end
368- ar = get_var_handle (g, vname)
369- att = get_var_attrs (g, vname)
370- if subs != = nothing
371- ar = view (ar, subs... )
405+ dsopen = YAXArrayBase. to_dataset (g, driver = driver)
406+ YAXArrayBase. open_dataset_handle (dsopen) do g
407+ isempty (get_varnames (g)) && throw (ArgumentError (" Group does not contain datasets." ))
408+ dimlist = collectdims (g)
409+ dnames = string .(keys (dimlist))
410+ varlist = filter (get_varnames (g)) do vn
411+ upname = uppercase (vn)
412+ ! occursin (" BNDS" , upname) &&
413+ ! occursin (" BOUNDS" , upname) &&
414+ ! any (i -> isequal (upname, uppercase (i)), dnames)
372415 end
373- if ! haskey (att, " name" )
374- att[" name" ] = vname
375- end
376- atts = propfromattr (att)
377- if any (in (keys (atts)), [" missing_value" , " scale_factor" , " add_offset" ])
378- ar = CFDiskArray (ar, atts)
416+ allcubes = OrderedDict {Symbol,YAXArray} ()
417+ for vname in varlist
418+ vardims = get_var_dims (g, vname)
419+ iax = tuple (collect (dimlist[vd]. ax for vd in vardims)... )
420+ offs = [dimlist[vd]. offs for vd in vardims]
421+ subs = if all (iszero, offs)
422+ nothing
423+ else
424+ ntuple (i -> (offs[i]+ 1 ): (offs[i]+ length (iax[i])), length (offs))
425+ end
426+ ar = get_var_handle (g, vname,persist= true )
427+ att = get_var_attrs (g, vname)
428+ if subs != = nothing
429+ ar = view (ar, subs... )
430+ end
431+ if ! haskey (att, " name" )
432+ att[" name" ] = vname
433+ end
434+ atts = propfromattr (att)
435+ if any (in (keys (atts)), [" missing_value" , " scale_factor" , " add_offset" ])
436+ ar = CFDiskArray (ar, atts)
437+ end
438+ allcubes[Symbol (vname)] = YAXArray (iax, ar, atts, cleaner = CleanMe[])
379439 end
380- allcubes[Symbol (vname)] = YAXArray (iax, ar, atts, cleaner = CleanMe[])
440+ gatts = YAXArrayBase. get_global_attrs (g)
441+ gatts = Dict {String,Any} (string (k)=> v for (k,v) in gatts)
442+ sdimlist = Dict (DD. name (v. ax) => v. ax for (k, v) in dimlist)
443+ Dataset (allcubes, sdimlist,gatts)
381444 end
382- gatts = YAXArrayBase. get_global_attrs (g)
383- gatts = Dict {String,Any} (string (k)=> v for (k,v) in gatts)
384- sdimlist = Dict (DD. name (v. ax) => v. ax for (k, v) in dimlist)
385- Dataset (allcubes, sdimlist,gatts)
386445end
387446# Base.getindex(x::Dataset; kwargs...) = subsetcube(x; kwargs...)
388447YAXDataset (; kwargs... ) = Dataset (YAXArrays. YAXDefaults. cubedir[]; kwargs... )
0 commit comments