diff --git a/README.md b/README.md index 481311b..7b86108 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,9 @@ while `A.keys isa Tuple` for matrices & higher. But `axiskeys(A)` always returns * Named tuples can be converted to and from keyed vectors, with `collect(keys(nt)) == Symbol.(axiskeys(V),1)` +* The [Tables.jl](https://github.com/JuliaData/Tables.jl) interface is supported, + with `wrapdims(df, :val, :x, :y)` creating a matrix from 3 columns. + * [FFTW](https://github.com/JuliaMath/FFTW.jl)`.fft` transforms the keys; if these are times such as [Unitful](https://github.com/PainterQubits/Unitful.jl)`.s` then the results are fequency labels. ([PR#15](https://github.com/mcabbott/AxisKeys.jl/pull/15).) diff --git a/src/tables.jl b/src/tables.jl index ec03c3e..6fb910f 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -115,3 +115,104 @@ end # end # end + +""" + AxisKeys.populate!(A, table, value; force=false) + +Populate `A` with the contents of the `value` column in a provided `table`, matching the +[Tables.jl](https://github.com/JuliaData/Tables.jl) API. The `table` must contain columns +corresponding to the keys in `A` and implements `Tables.rows`. If the keys in `A` do not +uniquely identify rows in the `table` then an `ArgumentError` is throw. If `force` is true +then the duplicate (non-unique) entries will be overwritten. +""" +function populate!(A, table, value::Symbol; force=false) + # Use a BitArray mask to detect duplicates and error instead of overwriting. + mask = force ? falses() : falses(size(A)) + + for r in Tables.rows(table) + vals = Tuple(Tables.getcolumn(r, c) for c in dimnames(A)) + inds = map(findindex, vals, axiskeys(A)) + + # Handle duplicate error checking if applicable + if !force + # Error if mask already set. + mask[inds...] && throw(ArgumentError("Key $vals is not unique")) + # Set mask, marking that we've set this index + setindex!(mask, true, inds...) + end + + # Insert our value into the data array + setindex!(A, Tables.getcolumn(r, value), inds...) + end + + return A +end + +""" + wrapdims(table, value, names...; default=undef, sort=false, force=false) + +Construct `KeyedArray(NamedDimsArray(A,names),keys)` from a `table` matching +the [Tables.jl](https://github.com/JuliaData/Tables.jl) API. +(It must support both `Tables.columns` and `Tables.rows`.) + +The contents of the array is taken from the column `value::Symbol` of the table. +Each symbol in `names` specifies a column whose unique entries +become the keys along a dimenension of the array. + +If there is no row in the table matching a possible set of keys, +then this element of the array is undefined, unless you provide the `default` keyword. +If several rows share the same set of keys, then by default an `ArgumentError` is thrown. +Keyword `force=true` will instead cause these non-unique entries to be overwritten. + +Setting `AxisKeys.nameouter() = false` will reverse the order of wrappers produced. +""" +function wrapdims(table, value::Symbol, names::Symbol...; kw...) + if nameouter() == false + _wrap_table(KeyedArray, identity, table, value, names...; kw...) + else + _wrap_table(NamedDimsArray, identity, table, value, names...; kw...) + end +end + +""" + wrapdims(df, UniqueVector, :val, :x, :y) + +Converts at Tables.jl table to a `KeyedArray` + `NamedDimsArray` pair, +using column `:val` for values, and columns `:x, :y` for names & keys. +Optional 2nd argument applies this type to all the key-vectors. +""" +function wrapdims(table, KT::Type, value::Symbol, names::Symbol...; kw...) + if nameouter() == false + _wrap_table(KeyedArray, KT, table, value, names...; kw...) + else + _wrap_table(NamedDimsArray, KT, table, value, names...; kw...) + end +end + +function _wrap_table(AT::Type, KT, table, value::Symbol, names::Symbol...; default=undef, sort::Bool=false, kwargs...) + # get columns of the input table source + cols = Tables.columns(table) + + # Extract key columns + pairs = map(names) do k + col = unique(Tables.getcolumn(cols, k)) + sort && Base.sort!(col) + return k => KT(col) + end + + # Extract data/value column + vals = Tables.getcolumn(cols, value) + + # Initialize the KeyedArray + sz = length.(last.(pairs)) + if default === undef + data = similar(vals, sz) + else + data = similar(vals, Union{eltype(vals), typeof(default)}, sz) + fill!(data, default) + end + A = AT(data; pairs...) + + populate!(A, table, value; kwargs...) + return A +end diff --git a/test/_packages.jl b/test/_packages.jl index 5d90449..5826558 100644 --- a/test/_packages.jl +++ b/test/_packages.jl @@ -29,14 +29,70 @@ end @testset "tables" begin using Tables - R = wrapdims(rand(2,3), 11:12, 21:23) - N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0]) - - @test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value) - @test keys(first(Tables.rows(N))) == (:a, :b, :value) - - @test Tables.columns(N).a == [11, 12, 11, 12, 11, 12] - + @testset "source" begin + R = wrapdims(rand(2,3), 11:12, 21:23) + N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0]) + + @test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value) + @test keys(first(Tables.rows(N))) == (:a, :b, :value) + + @test Tables.columns(N).a == [11, 12, 11, 12, 11, 12] + end + @testset "sink" begin + A = KeyedArray(rand(24, 11, 3); time = 0:23, loc = -5:5, id = ["a", "b", "c"]) + table = Tables.columntable(A) + + # Test fully constructing from a table + # Common when working with adhoc data + B = wrapdims(table, :value, :time, :loc, :id) + @test B == A + + # Test wrapping of key vectors, and wrong order: + U = wrapdims(table, UniqueVector, :value, :id, :time, :loc) + @test axiskeys(U, :time) isa UniqueVector + @test U(time=3, id="b") == A(time=3, id="b") + + # Test populating an existing array (e.g., expected data based on calculated targets/offsets) + C = KeyedArray( + zeros(Float64, size(A)); + time = unique(table.time), + loc = unique(table.loc), + id = unique(table.id), + ) + @test C != A + AxisKeys.populate!(C, table, :value) + @test C == A + + # Constructing a NamedDimsArray with different default value and table type + # Partial populating + r_table = Tables.rowtable(A) + n = length(r_table) + idx = rand(Bool, n) + D = wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing) + # dimnames should still match, but we'll have missing values + @test dimnames(D) == dimnames(A) + @test any(ismissing, D) + + # BTW, this is why it's a method of wrapdims, not KeyedArray: + # @code_warntype wrapdims(table, :value, :time, :loc, :id) # ::Any + # @code_warntype wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing) + + # Construction with invalid columns error as expected, but the specific error is + # dependent on the table type. + # ERROR: ArgumentError: wrong number of names, got (:q, :time, :loc, :id) with ndims(A) == 1 + @test_throws ArgumentError wrapdims(Tables.rowtable(A), :q, :time, :loc, :id) + # ERROR: ArgumentError: wrong number of names, got (:value, :p, :loc, :id) with ndims(A) == 1 + @test_throws ArgumentError wrapdims(Tables.rowtable(A), :value, :p, :loc, :id) + # ERROR: type NamedTuple has no field q + @test_throws ErrorException wrapdims(Tables.columntable(A), :q, :time, :loc, :id) + # ERROR: type NamedTuple has no field p + @test_throws ErrorException wrapdims(Tables.columntable(A), :value, :p, :loc, :id) + + # Construction with duplicates + # ERROR: ArgumentError: Key (Date("2019-01-01"), -5) is not unique + @test_throws ArgumentError wrapdims(table, :value, :time, :loc) + @test wrapdims(r_table, :value, :time, :loc; force=true) == C(:, :, Key("c")) + end end @testset "stack" begin using LazyStack diff --git a/test/runtests.jl b/test/runtests.jl index 8cc664f..d050bf3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,13 +9,13 @@ using Statistics, OffsetArrays, Tables, UniqueVectors, LazyStack AxisKeys.nameouter() = false end - # include("_basic.jl") + include("_basic.jl") include("_functions.jl") - # include("_fast.jl") + include("_fast.jl") - # include("_packages.jl") + include("_packages.jl") end @testset "fast findfirst & findall" begin