|
| 1 | +module HDF5Ext |
| 2 | +import YAXArrayBase: YAXArrayBase as YAB |
| 3 | +using HDF5 |
| 4 | + |
| 5 | +""" |
| 6 | + HDF5Dataset |
| 7 | +
|
| 8 | +Dataset backend to read HDF5 files using HDF5.jl |
| 9 | +""" |
| 10 | +struct HDF5Dataset |
| 11 | + filename::String |
| 12 | + mode::String |
| 13 | + handle::Base.RefValue{Union{Nothing,HDF5.File}} |
| 14 | +end |
| 15 | +function HDF5Dataset(filename; mode="r") |
| 16 | + HDF5Dataset(filename, mode, Ref{Union{Nothing,HDF5.File}}(nothing)) |
| 17 | +end |
| 18 | +function dsopen(f, ds::HDF5Dataset) |
| 19 | + if ds.handle[] === nothing || !Base.isopen(ds.handle[]) |
| 20 | + HDF5.h5open(f, ds.filename, ds.mode) |
| 21 | + else |
| 22 | + f(ds.handle[]) |
| 23 | + end |
| 24 | +end |
| 25 | +function YAB.open_dataset_handle(f, ds::HDF5Dataset) |
| 26 | + if ds.handle[] === nothing || !Base.isopen(ds.handle[]) |
| 27 | + try |
| 28 | + ds.handle[] = HDF5.h5open(ds.filename, ds.mode) |
| 29 | + f(ds) |
| 30 | + finally |
| 31 | + ds.handle[] = nothing |
| 32 | + end |
| 33 | + else |
| 34 | + f(ds) |
| 35 | + end |
| 36 | +end |
| 37 | + |
| 38 | +function __init__() |
| 39 | + @debug "new driver key :HDF5, updating backendlist." |
| 40 | + YAB.backendlist[:HDF5] = HDF5Dataset |
| 41 | + push!(YAB.backendregex, r".h5$" => HDF5Dataset) |
| 42 | +end |
| 43 | + |
| 44 | +function get_all_paths(file, prefix="") |
| 45 | + paths = String[] |
| 46 | + |
| 47 | + for key in keys(file) |
| 48 | + full_path = isempty(prefix) ? key : "$prefix/$key" |
| 49 | + obj = file[key] |
| 50 | + |
| 51 | + if isa(obj, HDF5.Dataset) |
| 52 | + push!(paths, full_path) |
| 53 | + elseif isa(obj, HDF5.Group) |
| 54 | + append!(paths, get_all_paths(obj, full_path)) |
| 55 | + end |
| 56 | + end |
| 57 | + |
| 58 | + return paths |
| 59 | +end |
| 60 | + |
| 61 | +function get_dims(f, var) |
| 62 | + dims = String[] |
| 63 | + ds = f[var] |
| 64 | + if haskey(ds, "DIMENSION_LIST") |
| 65 | + dimension_list = read_attribute(ds, "DIMENSION_LIST") |
| 66 | + for dimensions in dimension_list |
| 67 | + for dim_ref in dimensions |
| 68 | + push!(dims, HDF5.name(f[dim_ref])) |
| 69 | + end |
| 70 | + end |
| 71 | + end |
| 72 | + return dims |
| 73 | +end |
| 74 | + |
| 75 | +"Return a list of variable names" |
| 76 | +YAB.get_varnames(ds::HDF5Dataset) = dsopen(get_all_paths, ds) |
| 77 | + |
| 78 | +"Return a list of dimension names for a given variable" |
| 79 | +YAB.get_var_dims(ds::HDF5Dataset, name) = dsopen(x -> get_dims(x, name), ds) |
| 80 | + |
| 81 | +function get_var_attrs(file, name) |
| 82 | + attributes = Dict(attrs(file[name])) |
| 83 | + pop!(attributes, "DIMENSION_LIST", nothing) # Remove DIMENSION_LIST if present |
| 84 | + return attributes |
| 85 | +end |
| 86 | + |
| 87 | +"Return a dict with the attributes for a given variable" |
| 88 | +YAB.get_var_attrs(ds::HDF5Dataset, name) = dsopen(v -> get_var_attrs(v, name), ds) |
| 89 | + |
| 90 | +"Return a dict with global attributes for the dataset" |
| 91 | +YAB.get_global_attrs(ds::HDF5Dataset) = dsopen(h5 -> Dict(attrs(h5)), ds) |
| 92 | + |
| 93 | +"Return a DiskArray handle to a dataset" |
| 94 | +function YAB.get_var_handle(ds::HDF5Dataset, i; persist=true) |
| 95 | + if persist || ds.handle[] === nothing |
| 96 | + s, et = NetCDF.open(j -> (size(j), eltype(j)), ds.filename, i) |
| 97 | + NetCDFVariable{et,length(s)}(ds.filename, i, s) |
| 98 | + else |
| 99 | + ds.handle[][i] |
| 100 | + end |
| 101 | +end |
| 102 | +Base.haskey(ds::HDF5Dataset, k) = dsopen(h5 -> haskey(h5, k), ds) |
| 103 | + |
| 104 | + |
| 105 | +end |
0 commit comments