Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ while `A.keys isa Tuple` for matrices & higher. But `axiskeys(A)` always returns
* Named tuples can be converted to and from keyed vectors,
with `collect(keys(nt)) == Symbol.(axiskeys(V),1)`

* The [Tables.jl](https://github.com/JuliaData/Tables.jl) interface is supported,
with `wrapdims(df, :val, :x, :y)` creating a matrix from 3 columns.

* [FFTW](https://github.com/JuliaMath/FFTW.jl)`.fft` transforms the keys;
if these are times such as [Unitful](https://github.com/PainterQubits/Unitful.jl)`.s`
then the results are fequency labels. ([PR#15](https://github.com/mcabbott/AxisKeys.jl/pull/15).)
Expand Down
101 changes: 101 additions & 0 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,104 @@ end

# end
# end

"""
AxisKeys.populate!(A, table, value; force=false)

Populate `A` with the contents of the `value` column in a provided `table`, matching the
[Tables.jl](https://github.com/JuliaData/Tables.jl) API. The `table` must contain columns
corresponding to the keys in `A` and implements `Tables.rows`. If the keys in `A` do not
uniquely identify rows in the `table` then an `ArgumentError` is throw. If `force` is true
then the duplicate (non-unique) entries will be overwritten.
"""
function populate!(A, table, value::Symbol; force=false)
# Use a BitArray mask to detect duplicates and error instead of overwriting.
mask = force ? falses() : falses(size(A))

for r in Tables.rows(table)
vals = Tuple(Tables.getcolumn(r, c) for c in dimnames(A))
inds = map(findindex, vals, axiskeys(A))

# Handle duplicate error checking if applicable
if !force
# Error if mask already set.
mask[inds...] && throw(ArgumentError("Key $vals is not unique"))
# Set mask, marking that we've set this index
setindex!(mask, true, inds...)
end

# Insert our value into the data array
setindex!(A, Tables.getcolumn(r, value), inds...)
end

return A
end

"""
wrapdims(table, value, names...; default=undef, sort=false, force=false)

Construct `KeyedArray(NamedDimsArray(A,names),keys)` from a `table` matching
the [Tables.jl](https://github.com/JuliaData/Tables.jl) API.
(It must support both `Tables.columns` and `Tables.rows`.)

The contents of the array is taken from the column `value::Symbol` of the table.
Each symbol in `names` specifies a column whose unique entries
become the keys along a dimenension of the array.

If there is no row in the table matching a possible set of keys,
then this element of the array is undefined, unless you provide the `default` keyword.
If several rows share the same set of keys, then by default an `ArgumentError` is thrown.
Keyword `force=true` will instead cause these non-unique entries to be overwritten.

Setting `AxisKeys.nameouter() = false` will reverse the order of wrappers produced.
"""
function wrapdims(table, value::Symbol, names::Symbol...; kw...)
if nameouter() == false
_wrap_table(KeyedArray, identity, table, value, names...; kw...)
else
_wrap_table(NamedDimsArray, identity, table, value, names...; kw...)
end
end

"""
wrapdims(df, UniqueVector, :val, :x, :y)

Converts at Tables.jl table to a `KeyedArray` + `NamedDimsArray` pair,
using column `:val` for values, and columns `:x, :y` for names & keys.
Optional 2nd argument applies this type to all the key-vectors.
"""
function wrapdims(table, KT::Type, value::Symbol, names::Symbol...; kw...)
if nameouter() == false
_wrap_table(KeyedArray, KT, table, value, names...; kw...)
else
_wrap_table(NamedDimsArray, KT, table, value, names...; kw...)
end
end

function _wrap_table(AT::Type, KT, table, value::Symbol, names::Symbol...; default=undef, sort::Bool=false, kwargs...)
# get columns of the input table source
cols = Tables.columns(table)

# Extract key columns
pairs = map(names) do k
col = unique(Tables.getcolumn(cols, k))
sort && Base.sort!(col)
return k => KT(col)
end

# Extract data/value column
vals = Tables.getcolumn(cols, value)

# Initialize the KeyedArray
sz = length.(last.(pairs))
if default === undef
data = similar(vals, sz)
else
data = similar(vals, Union{eltype(vals), typeof(default)}, sz)
fill!(data, default)
end
A = AT(data; pairs...)

populate!(A, table, value; kwargs...)
return A
end
72 changes: 64 additions & 8 deletions test/_packages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,70 @@ end
@testset "tables" begin
using Tables

R = wrapdims(rand(2,3), 11:12, 21:23)
N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0])

@test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value)
@test keys(first(Tables.rows(N))) == (:a, :b, :value)

@test Tables.columns(N).a == [11, 12, 11, 12, 11, 12]

@testset "source" begin
R = wrapdims(rand(2,3), 11:12, 21:23)
N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0])

@test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value)
@test keys(first(Tables.rows(N))) == (:a, :b, :value)

@test Tables.columns(N).a == [11, 12, 11, 12, 11, 12]
end
@testset "sink" begin
A = KeyedArray(rand(24, 11, 3); time = 0:23, loc = -5:5, id = ["a", "b", "c"])
table = Tables.columntable(A)

# Test fully constructing from a table
# Common when working with adhoc data
B = wrapdims(table, :value, :time, :loc, :id)
@test B == A

# Test wrapping of key vectors, and wrong order:
U = wrapdims(table, UniqueVector, :value, :id, :time, :loc)
@test axiskeys(U, :time) isa UniqueVector
@test U(time=3, id="b") == A(time=3, id="b")

# Test populating an existing array (e.g., expected data based on calculated targets/offsets)
C = KeyedArray(
zeros(Float64, size(A));
time = unique(table.time),
loc = unique(table.loc),
id = unique(table.id),
)
@test C != A
AxisKeys.populate!(C, table, :value)
@test C == A

# Constructing a NamedDimsArray with different default value and table type
# Partial populating
r_table = Tables.rowtable(A)
n = length(r_table)
idx = rand(Bool, n)
D = wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)
# dimnames should still match, but we'll have missing values
@test dimnames(D) == dimnames(A)
@test any(ismissing, D)

# BTW, this is why it's a method of wrapdims, not KeyedArray:
# @code_warntype wrapdims(table, :value, :time, :loc, :id) # ::Any
# @code_warntype wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)

# Construction with invalid columns error as expected, but the specific error is
# dependent on the table type.
# ERROR: ArgumentError: wrong number of names, got (:q, :time, :loc, :id) with ndims(A) == 1
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :q, :time, :loc, :id)
# ERROR: ArgumentError: wrong number of names, got (:value, :p, :loc, :id) with ndims(A) == 1
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :value, :p, :loc, :id)
# ERROR: type NamedTuple has no field q
@test_throws ErrorException wrapdims(Tables.columntable(A), :q, :time, :loc, :id)
# ERROR: type NamedTuple has no field p
@test_throws ErrorException wrapdims(Tables.columntable(A), :value, :p, :loc, :id)

# Construction with duplicates
# ERROR: ArgumentError: Key (Date("2019-01-01"), -5) is not unique
@test_throws ArgumentError wrapdims(table, :value, :time, :loc)
@test wrapdims(r_table, :value, :time, :loc; force=true) == C(:, :, Key("c"))
end
end
@testset "stack" begin
using LazyStack
Expand Down
6 changes: 3 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ using Statistics, OffsetArrays, Tables, UniqueVectors, LazyStack
AxisKeys.nameouter() = false
end

# include("_basic.jl")
include("_basic.jl")

include("_functions.jl")

# include("_fast.jl")
include("_fast.jl")

# include("_packages.jl")
include("_packages.jl")

end
@testset "fast findfirst & findall" begin
Expand Down