Skip to content

Commit d2d2436

Browse files
committed
Add functions name and label of an array
1 parent 1069fa1 commit d2d2436

File tree

2 files changed

+94
-63
lines changed

2 files changed

+94
-63
lines changed

src/Cubes/Cubes.jl

Lines changed: 81 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ using DiskArrayTools: CFDiskArray
1515
using DocStringExtensions
1616
using Tables: istable, schema, columns
1717
using DimensionalData: DimensionalData as DD, AbstractDimArray, NoName
18-
import DimensionalData: name
18+
import DimensionalData: name, label
1919

2020
export concatenatecubes, caxes, subsetcube, readcubedata, renameaxis!, YAXArray, setchunks, cache
2121

@@ -91,7 +91,7 @@ It can wrap normal arrays or, more typically DiskArrays.
9191
9292
$(FIELDS)
9393
"""
94-
struct YAXArray{T,N,A<:AbstractArray{T,N}, D, Me} <: AbstractDimArray{T,N,D,A}
94+
struct YAXArray{T,N,A<:AbstractArray{T,N},D,Me} <: AbstractDimArray{T,N,D,A}
9595
"`Tuple` of Dimensions containing the Axes of the Cube"
9696
axes::D
9797
"length(axes)-dimensional array which holds the data, this can be a lazy DiskArray"
@@ -120,7 +120,7 @@ struct YAXArray{T,N,A<:AbstractArray{T,N}, D, Me} <: AbstractDimArray{T,N,D,A}
120120
throw(ArgumentError("Can not construct YAXArray, supplied chunk dimension is $(ndims(chunks)) while the number of dims is $(length(axes))"))
121121
else
122122
axes = DD.format(axes, data)
123-
return new{eltype(data),ndims(data),typeof(data),typeof(axes), typeof(properties)}(
123+
return new{eltype(data),ndims(data),typeof(data),typeof(axes),typeof(properties)}(
124124
axes,
125125
data,
126126
properties,
@@ -131,16 +131,34 @@ struct YAXArray{T,N,A<:AbstractArray{T,N}, D, Me} <: AbstractDimArray{T,N,D,A}
131131
end
132132
end
133133

134-
name(::YAXArray) = NoName()
135134

136-
YAXArray(axes, data, properties = Dict{String,Any}(); cleaner = CleanMe[], chunks = eachchunk(data)) =
135+
"Name of an YAXArray using CF conventions. Searches first for metadata `long_name`, followed by `standard_name` and finally `name`"
136+
function name(a::YAXArray)
137+
# as implemented in python xarray
138+
isempty(a.properties) && return NoName()
139+
haskey(a.properties, "long_name") && return a.properties["long_name"]
140+
haskey(a.properties, "standard_name") && return a.properties["standard_name"]
141+
haskey(a.properties, "name") && return a.properties["name"]
142+
return NoName()
143+
end
144+
145+
"Label of an YAXArrray using CF conventions. Includes the name and the unit, if present. Searches first for metadata `units` followed by `unit``"
146+
function label(a::YAXArray)
147+
# as implemented in python xarray
148+
isempty(a.properties) && return ""
149+
haskey(a.properties, "units") && return "$(name(a)) [$(a.properties["units"])]"
150+
haskey(a.properties, "unit") && return "$(name(a)) [$(a.properties["unit"])]"
151+
return string(name(a))
152+
end
153+
154+
YAXArray(axes, data, properties=Dict{String,Any}(); cleaner=CleanMe[], chunks=eachchunk(data)) =
137155
YAXArray(axes, data, properties, chunks, cleaner)
138-
YAXArray(axes,data,properties,cleaner) = YAXArray(axes,data,properties,eachchunk(data),cleaner)
156+
YAXArray(axes, data, properties, cleaner) = YAXArray(axes, data, properties, eachchunk(data), cleaner)
139157
function YAXArray(x::AbstractArray)
140158
ax = caxes(x)
141159
props = getattributes(x)
142160
chunks = eachchunk(x)
143-
YAXArray(ax, x, props,chunks=chunks)
161+
YAXArray(ax, x, props, chunks=chunks)
144162
end
145163

146164

@@ -172,20 +190,20 @@ end
172190
Base.Generator(f, A::YAXArray) = Base.Generator(f, parent(A))
173191
Base.ndims(a::YAXArray{<:Any,N}) where {N} = N
174192
Base.eltype(a::YAXArray{T}) where {T} = T
175-
function Base.permutedims(c::YAXArray, p)
193+
function Base.permutedims(c::YAXArray, p)
176194
newdims = DD.sortdims(DD.dims(c), Tuple(p))
177195
dimnums = map(d -> DD.dimnum(c, d), p)
178196
newdata = permutedims(getdata(c), dimnums)
179197
newchunks = DiskArrays.GridChunks(eachchunk(c).chunks[collect(dimnums)])
180198
YAXArray(newdims, newdata, c.properties, newchunks, c.cleaner)
181199
end
182-
DiskArrays.cache(a::YAXArray;maxsize=1000) = DD.rebuild(a,cache(a.data;maxsize))
200+
DiskArrays.cache(a::YAXArray; maxsize=1000) = DD.rebuild(a, cache(a.data; maxsize))
183201

184202
# DimensionalData overloads
185203

186-
DD.dims(x::YAXArray) = getfield(x,:axes)
204+
DD.dims(x::YAXArray) = getfield(x, :axes)
187205
DD.refdims(::YAXArray) = ()
188-
DD.metadata(x::YAXArray) = getfield(x,:properties)
206+
DD.metadata(x::YAXArray) = getfield(x, :properties)
189207

190208
function DD.rebuild(A::YAXArray, data::AbstractArray, dims::Tuple, refdims::Tuple, name, metadata)
191209
#chunks = map(dims, eachchunk(data).chunks) do d, chunk
@@ -205,14 +223,14 @@ function DD.rebuild(A::YAXArray; data=parent(A), dims=DD.dims(A), metadata=DD.me
205223
end
206224

207225
function caxes(x)
208-
#@show x
209-
#@show typeof(x)
210-
dims = map(enumerate(dimnames(x))) do a
211-
index, symbol = a
212-
values = YAXArrayBase.dimvals(x, index)
213-
DD.Dim{symbol}(values)
214-
end
215-
(dims... ,)
226+
#@show x
227+
#@show typeof(x)
228+
dims = map(enumerate(dimnames(x))) do a
229+
index, symbol = a
230+
values = YAXArrayBase.dimvals(x, index)
231+
DD.Dim{symbol}(values)
232+
end
233+
(dims...,)
216234
end
217235

218236
caxes(x::DD.AbstractDimArray) = collect(DD.dims(x))
@@ -238,18 +256,18 @@ function readcubedata(x)
238256
YAXArray(caxes(x), getindex_all(x), getattributes(x))
239257
end
240258

241-
interpret_cubechunks(cs::NTuple{N,Int},cube) where N = DiskArrays.GridChunks(getdata(cube),cs)
242-
interpret_cubechunks(cs::DiskArrays.GridChunks,_) = cs
243-
interpret_dimchunk(cs::Integer,s) = DiskArrays.RegularChunks(cs,0,s)
259+
interpret_cubechunks(cs::NTuple{N,Int}, cube) where {N} = DiskArrays.GridChunks(getdata(cube), cs)
260+
interpret_cubechunks(cs::DiskArrays.GridChunks, _) = cs
261+
interpret_dimchunk(cs::Integer, s) = DiskArrays.RegularChunks(cs, 0, s)
244262
interpret_dimchunk(cs::DiskArrays.ChunkVector, _) = cs
245263

246-
function interpret_cubechunks(cs,cube)
264+
function interpret_cubechunks(cs, cube)
247265
oldchunks = DiskArrays.eachchunk(cube).chunks
248266
for k in keys(cs)
249-
i = findAxis(k,cube)
267+
i = findAxis(k, cube)
250268
if i !== nothing
251-
dimchunk = interpret_dimchunk(cs[k],size(cube.data,i))
252-
oldchunks = Base.setindex(oldchunks,dimchunk,i)
269+
dimchunk = interpret_dimchunk(cs[k], size(cube.data, i))
270+
oldchunks = Base.setindex(oldchunks, dimchunk, i)
253271
end
254272
end
255273
GridChunks(oldchunks)
@@ -267,7 +285,7 @@ of this chunking, use `savecube` on the resulting array. The `chunks` argument c
267285
- an AbstractDict or NamedTuple mapping one or more axis names to chunk sizes
268286
269287
"""
270-
setchunks(c::YAXArray,chunks) = YAXArray(c.axes,c.data,c.properties,interpret_cubechunks(chunks,c),c.cleaner)
288+
setchunks(c::YAXArray, chunks) = YAXArray(c.axes, c.data, c.properties, interpret_cubechunks(chunks, c), c.cleaner)
271289
cubechunks(c) = approx_chunksize(eachchunk(c))
272290
DiskArrays.eachchunk(c::YAXArray) = c.chunks
273291
getindex_all(a) = getindex(a, ntuple(_ -> Colon(), ndims(a))...).data
@@ -283,45 +301,45 @@ end
283301
=#
284302

285303

286-
function batchextract(x,i)
304+
function batchextract(x, i)
287305
# This function should be documented and moved to DimensionalData
288306
sch = schema(i)
289307
axinds = map(sch.names) do n
290-
findAxis(n,x)
308+
findAxis(n, x)
291309
end
292310
tcols = columns(i)
293311
#Try to find a column denoting new axis name and values
294312
newaxcol = nothing
295-
296-
if any(isnothing,axinds)
297-
allnothings = findall(isnothing,axinds)
313+
314+
if any(isnothing, axinds)
315+
allnothings = findall(isnothing, axinds)
298316
if length(allnothings) == 1
299317
newaxcol = allnothings[1]
300318
end
301-
tcols = (;[p[1:2] for p in zip(keys(tcols), values(tcols), axinds) if !isnothing(last(p))]...)
302-
axinds = filter(!isnothing,axinds)
319+
tcols = (; [p[1:2] for p in zip(keys(tcols), values(tcols), axinds) if !isnothing(last(p))]...)
320+
axinds = filter(!isnothing, axinds)
303321
end
304-
322+
305323
allax = 1:ndims(x)
306-
axrem = setdiff(allax,axinds)
324+
axrem = setdiff(allax, axinds)
307325
ai1, ai2 = extrema(axinds)
308-
309-
if !all(diff(sort(collect(axinds))).==1)
326+
327+
if !all(diff(sort(collect(axinds))) .== 1)
310328
#Axes to be extracted from are not consecutive in cube -> permute
311-
p = [1:(ai1-1);collect(axinds);filter(!in(axinds),ai1:ai2);(ai2+1:ndims(x))]
312-
x_perm = permutedims(x,p)
313-
return batchextract(x_perm,i)
329+
p = [1:(ai1-1); collect(axinds); filter(!in(axinds), ai1:ai2); (ai2+1:ndims(x))]
330+
x_perm = permutedims(x, p)
331+
return batchextract(x_perm, i)
314332
end
315333

316-
cartinds = map(axinds,tcols) do iax,col
334+
cartinds = map(axinds, tcols) do iax, col
317335
axcur = caxes(x)[iax]
318336
map(col) do val
319-
axVal2Index(axcur,val)
337+
axVal2Index(axcur, val)
320338
end
321339
end
322-
323-
before = ntuple(_->Colon(),ai1-1)
324-
after = ntuple(_->Colon(),ndims(x)-ai2)
340+
341+
before = ntuple(_ -> Colon(), ai1 - 1)
342+
after = ntuple(_ -> Colon(), ndims(x) - ai2)
325343
sp = issorted(axinds) ? nothing : sortperm(collect(axinds))
326344
function makeindex(sp, inds...)
327345
if sp === nothing
@@ -330,37 +348,37 @@ function batchextract(x,i)
330348
CartesianIndex(inds[sp]...)
331349
end
332350
end
333-
indlist = makeindex.(Ref(sp),cartinds...)
334-
d = getdata(x)[before...,indlist,after...]
351+
indlist = makeindex.(Ref(sp), cartinds...)
352+
d = getdata(x)[before..., indlist, after...]
335353
cax = caxes(x)
336354
newax = if newaxcol == nothing
337-
outaxis_from_data(cax,axinds,indlist)
355+
outaxis_from_data(cax, axinds, indlist)
338356
else
339-
outaxis_from_column(i,newaxcol)
357+
outaxis_from_column(i, newaxcol)
340358
end
341359
outax = Tuple([axcopy(a) for a in cax][axrem]...)
342-
insert!(outax,minimum(axinds),newax)
343-
YAXArray(outax,d,x.properties)
360+
insert!(outax, minimum(axinds), newax)
361+
YAXArray(outax, d, x.properties)
344362
end
345363

346-
function outaxis_from_column(tab,icol)
364+
function outaxis_from_column(tab, icol)
347365
axdata = columns(tab)[icol]
348366
axname = schema(tab).names[icol]
349367
if eltype(axdata) <: AbstractString ||
350-
(!issorted(axdata) && !issorted(axdata, rev = true))
368+
(!issorted(axdata) && !issorted(axdata, rev=true))
351369
DD.rebuild(DD.name2dim(Symbol(axname)), axdata)
352370
else
353371
DD.rebuild(DD.name2dim(Symbol(axname)), axdata)
354372
end
355373
end
356374

357-
function outaxis_from_data(cax,axinds,indlist)
358-
mergeaxes = getindex.(Ref(cax),axinds)
375+
function outaxis_from_data(cax, axinds, indlist)
376+
mergeaxes = getindex.(Ref(cax), axinds)
359377
mergenames = axname.(mergeaxes)
360-
newname = join(mergenames,'_')
378+
newname = join(mergenames, '_')
361379
minai = minimum(axinds)
362380
mergevals = map(indlist) do i
363-
broadcast(mergeaxes,axinds) do ax,ai
381+
broadcast(mergeaxes, axinds) do ax, ai
364382
ax.values[i[ai-minai+1]]
365383
end
366384
end
@@ -471,14 +489,14 @@ function _subsetcube(z, subs; kwargs...)
471489
end
472490

473491

474-
function Base.getindex(a::YAXArray, args::DD.Dimension...; kwargs...)
492+
function Base.getindex(a::YAXArray, args::DD.Dimension...; kwargs...)
475493
kwargsdict = Dict{Any,Any}(kwargs...)
476494
for ext in YAXDefaults.subsetextensions
477495
ext(kwargsdict)
478496
end
479497
d2 = Dict()
480-
for (k,v) in kwargsdict
481-
d = getAxis(k,a)
498+
for (k, v) in kwargsdict
499+
d = getAxis(k, a)
482500
if d !== nothing
483501
d2[DD.name(d)] = v
484502
else
@@ -491,7 +509,7 @@ end
491509
Base.read(d::YAXArray) = getindex_all(d)
492510

493511
function formatbytes(x)
494-
exts = ["bytes", "KB", "MB", "GB", "TB","PB"]
512+
exts = ["bytes", "KB", "MB", "GB", "TB", "PB"]
495513
i = 1
496514
while x >= 1024
497515
i = i + 1
@@ -515,7 +533,7 @@ function DD.show_after(io::IO, mime, c::YAXArray)
515533

516534
# ? sizeof : Check if the element type is a bitstype or a union of bitstypes
517535
if (isconcretetype(eltype(c)) && isbitstype(eltype(c))) ||
518-
(eltype(c) isa Union && all(isbitstype, Base.uniontypes(eltype(c))))
536+
(eltype(c) isa Union && all(isbitstype, Base.uniontypes(eltype(c))))
519537

520538
println(io, "\n data size: ", formatbytes(cubesize(c)))
521539
else # fallback

test/Cubes/cubes.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,19 @@ using DimensionalData
104104
@test endswith(Cubes.formatbytes(1205), "KB")
105105
@test endswith(Cubes.formatbytes(1200000), "MB")
106106
end
107+
108+
@testset "YAXArrays with CF conventions" begin
109+
attrs = Dict(
110+
"standard_name" => "air_temperature",
111+
"long_name" => "global mean air temperature",
112+
"units" => "K",
113+
)
114+
a_cf = YAXArray(a.axes, a.data, attrs)
115+
@test DimensionalData.name(a) == DimensionalData.NoName()
116+
@test DimensionalData.label(a) == ""
117+
@test DimensionalData.name(a_cf) == "global mean air temperature"
118+
@test DimensionalData.label(a_cf) == "global mean air temperature [K]"
119+
end
107120
#=
108121
@testset "Subsets" begin
109122
s = YAXArrays.Cubes.subsetcube(a, X = 1.5..3.5)

0 commit comments

Comments
 (0)