@@ -13,7 +13,7 @@ using DiskArrays: DiskArrays, GridChunks
13
13
using Glob: glob
14
14
using DimensionalData: DimensionalData as DD
15
15
16
- export Dataset, Cube, open_dataset, to_dataset, savecube, savedataset
16
+ export Dataset, Cube, open_dataset, to_dataset, savecube, savedataset, open_mfdataset
17
17
18
18
"""
19
19
Dataset object which stores an `OrderedDict` of YAXArrays with Symbol keys.
@@ -253,7 +253,7 @@ function collectdims(g)
253
253
varnames = get_varnames (g)
254
254
foreach (varnames) do k
255
255
d = get_var_dims (g, k)
256
- v = get_var_handle (g, k)
256
+ v = get_var_handle (g, k, persist = false )
257
257
for (len, dname) in zip (size (v), d)
258
258
if ! occursin (" bnd" , dname) && ! occursin (" bounds" , dname)
259
259
datts = if dname in varnames
@@ -277,7 +277,7 @@ function toaxis(dimname, g, offs, len)
277
277
if ! haskey (g, dimname)
278
278
return DD. rebuild (DD. name2dim (axname), 1 : len)
279
279
end
280
- ar = get_var_handle (g, dimname)
280
+ ar = get_var_handle (g, dimname, persist = false )
281
281
aratts = get_var_attrs (g, dimname)
282
282
if match (r" ^(days)|(hours)|(seconds)|(months) since" ,lowercase (get (aratts," units" ," " ))) != = nothing
283
283
tsteps = try
@@ -337,6 +337,63 @@ open_mfdataset(g::AbstractString; kwargs...) = open_mfdataset(_glob(g); kwargs..
337
337
open_mfdataset (g:: Vector{<:AbstractString} ; kwargs... ) =
338
338
merge_datasets (map (i -> open_dataset (i; kwargs... ), g))
339
339
340
+ function merge_new_axis (alldatasets, firstcube,var,mergedim)
341
+ newdim = DD. rebuild (mergedim,1 : length (alldatasets))
342
+ alldiskarrays = map (ds-> ds. cubes[var]. data,alldatasets). data
343
+ newda = diskstack (alldiskarrays)
344
+ newdims = (DD. dim (firstcube)... ,newdim)
345
+ YAXArray (newdims,newda,deepcopy (firstcube. properties))
346
+ end
347
+ function merge_existing_axis (alldatasets,firstcube,var,mergedim)
348
+ allaxvals = map (ds-> DD. dims (ds. cubes[var],mergedim). val,alldatasets)
349
+ newaxvals = reduce (vcat,allaxvals)
350
+ newdim = DD. rebuild (mergedim,newaxvals)
351
+ alldiskarrays = map (ds-> ds. cubes[var]. data,alldatasets)
352
+ istack = DD. dimnum (firstcube,mergedim)
353
+ newshape = ntuple (i-> i!= istack ? 1 : length (alldiskarrays),ndims (firstcube))
354
+ newda = DiskArrays. ConcatDiskArray (reshape (alldiskarrays,newshape))
355
+ newdims = Base. setindex (firstcube. axes,newdim,istack)
356
+ YAXArray (newdims,newda,deepcopy (firstcube. properties))
357
+ end
358
+
359
+ """
360
+ open_mfdataset(files::DD.DimVector{<:AbstractString}; kwargs...)
361
+
362
+ Opens and concatenates a list of dataset paths along the dimension specified in `files`.
363
+ This method can be used when the generic glob-based version of open_mfdataset fails
364
+ or is too slow.
365
+ For example, to concatenate a list of annual NetCDF files along the `Ti` dimension,
366
+ one can use:
367
+
368
+ ````julia
369
+ files = ["1990.nc","1991.nc","1992.nc"]
370
+ open_mfdataset(DD.DimArray(files,DD.Ti()))
371
+ ````
372
+
373
+ alternatively, if the dimension to concatenate along does not exist yet, the
374
+ dimension provided in the input arg is used:
375
+
376
+ ````julia
377
+ files = ["a.nc","b.nc","c.nc"]
378
+ open_mfdataset(DD.DimArray(files,DD.Dim{:NewDim}(["a","b","c"])))
379
+ ````
380
+ """
381
+ function open_mfdataset (vec:: DD.DimVector{<:AbstractString} ;kwargs... )
382
+ alldatasets = open_dataset .(vec;kwargs... );
383
+ fi = first (alldatasets)
384
+ mergedim = DD. dims (alldatasets) |> only
385
+ ars = map (collect (keys (fi. cubes))) do var
386
+ cfi = fi. cubes[var]
387
+ mergedar = if DD. dims (cfi,mergedim) != = nothing
388
+ merge_existing_axis (alldatasets,cfi,var,mergedim)
389
+ else
390
+ merge_new_axis (alldatasets,cfi,var,mergedim)
391
+ end
392
+ var => mergedar
393
+ end
394
+ Dataset (;ars... )
395
+ end
396
+
340
397
341
398
"""
342
399
open_dataset(g; driver=:all)
@@ -345,44 +402,46 @@ Open the dataset at `g` with the given `driver`.
345
402
The default driver will search for available drivers and tries to detect the useable driver from the filename extension.
346
403
"""
347
404
function open_dataset (g; driver = :all )
348
- g = YAXArrayBase. to_dataset (g, driver = driver)
349
- isempty (get_varnames (g)) && throw (ArgumentError (" Group does not contain datasets." ))
350
- dimlist = collectdims (g)
351
- dnames = string .(keys (dimlist))
352
- varlist = filter (get_varnames (g)) do vn
353
- upname = uppercase (vn)
354
- ! occursin (" BNDS" , upname) &&
355
- ! occursin (" BOUNDS" , upname) &&
356
- ! any (i -> isequal (upname, uppercase (i)), dnames)
357
- end
358
- allcubes = OrderedDict {Symbol,YAXArray} ()
359
- for vname in varlist
360
- vardims = get_var_dims (g, vname)
361
- iax = tuple (collect (dimlist[vd]. ax for vd in vardims)... )
362
- offs = [dimlist[vd]. offs for vd in vardims]
363
- subs = if all (iszero, offs)
364
- nothing
365
- else
366
- ntuple (i -> (offs[i]+ 1 ): (offs[i]+ length (iax[i])), length (offs))
367
- end
368
- ar = get_var_handle (g, vname)
369
- att = get_var_attrs (g, vname)
370
- if subs != = nothing
371
- ar = view (ar, subs... )
405
+ dsopen = YAXArrayBase. to_dataset (g, driver = driver)
406
+ YAXArrayBase. open_dataset_handle (dsopen) do g
407
+ isempty (get_varnames (g)) && throw (ArgumentError (" Group does not contain datasets." ))
408
+ dimlist = collectdims (g)
409
+ dnames = string .(keys (dimlist))
410
+ varlist = filter (get_varnames (g)) do vn
411
+ upname = uppercase (vn)
412
+ ! occursin (" BNDS" , upname) &&
413
+ ! occursin (" BOUNDS" , upname) &&
414
+ ! any (i -> isequal (upname, uppercase (i)), dnames)
372
415
end
373
- if ! haskey (att, " name" )
374
- att[" name" ] = vname
375
- end
376
- atts = propfromattr (att)
377
- if any (in (keys (atts)), [" missing_value" , " scale_factor" , " add_offset" ])
378
- ar = CFDiskArray (ar, atts)
416
+ allcubes = OrderedDict {Symbol,YAXArray} ()
417
+ for vname in varlist
418
+ vardims = get_var_dims (g, vname)
419
+ iax = tuple (collect (dimlist[vd]. ax for vd in vardims)... )
420
+ offs = [dimlist[vd]. offs for vd in vardims]
421
+ subs = if all (iszero, offs)
422
+ nothing
423
+ else
424
+ ntuple (i -> (offs[i]+ 1 ): (offs[i]+ length (iax[i])), length (offs))
425
+ end
426
+ ar = get_var_handle (g, vname,persist= true )
427
+ att = get_var_attrs (g, vname)
428
+ if subs != = nothing
429
+ ar = view (ar, subs... )
430
+ end
431
+ if ! haskey (att, " name" )
432
+ att[" name" ] = vname
433
+ end
434
+ atts = propfromattr (att)
435
+ if any (in (keys (atts)), [" missing_value" , " scale_factor" , " add_offset" ])
436
+ ar = CFDiskArray (ar, atts)
437
+ end
438
+ allcubes[Symbol (vname)] = YAXArray (iax, ar, atts, cleaner = CleanMe[])
379
439
end
380
- allcubes[Symbol (vname)] = YAXArray (iax, ar, atts, cleaner = CleanMe[])
440
+ gatts = YAXArrayBase. get_global_attrs (g)
441
+ gatts = Dict {String,Any} (string (k)=> v for (k,v) in gatts)
442
+ sdimlist = Dict (DD. name (v. ax) => v. ax for (k, v) in dimlist)
443
+ Dataset (allcubes, sdimlist,gatts)
381
444
end
382
- gatts = YAXArrayBase. get_global_attrs (g)
383
- gatts = Dict {String,Any} (string (k)=> v for (k,v) in gatts)
384
- sdimlist = Dict (DD. name (v. ax) => v. ax for (k, v) in dimlist)
385
- Dataset (allcubes, sdimlist,gatts)
386
445
end
387
446
# Base.getindex(x::Dataset; kwargs...) = subsetcube(x; kwargs...)
388
447
YAXDataset (; kwargs... ) = Dataset (YAXArrays. YAXDefaults. cubedir[]; kwargs... )
0 commit comments