Skip to content

Commit ecfa122

Browse files
authored
Merge pull request #21 from invenia/rf/tables
Add support for KeyedArrays as a Tables sink
2 parents 5d1d032 + 877cc80 commit ecfa122

File tree

4 files changed

+171
-11
lines changed

4 files changed

+171
-11
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ while `A.keys isa Tuple` for matrices & higher. But `axiskeys(A)` always returns
140140
* Named tuples can be converted to and from keyed vectors,
141141
with `collect(keys(nt)) == Symbol.(axiskeys(V),1)`
142142

143+
* The [Tables.jl](https://github.com/JuliaData/Tables.jl) interface is supported,
144+
with `wrapdims(df, :val, :x, :y)` creating a matrix from 3 columns.
145+
143146
* [FFTW](https://github.com/JuliaMath/FFTW.jl)`.fft` transforms the keys;
144147
if these are times such as [Unitful](https://github.com/PainterQubits/Unitful.jl)`.s`
145148
then the results are fequency labels. ([PR#15](https://github.com/mcabbott/AxisKeys.jl/pull/15).)

src/tables.jl

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,104 @@ end
115115

116116
# end
117117
# end
118+
119+
"""
120+
AxisKeys.populate!(A, table, value; force=false)
121+
122+
Populate `A` with the contents of the `value` column in a provided `table`, matching the
123+
[Tables.jl](https://github.com/JuliaData/Tables.jl) API. The `table` must contain columns
124+
corresponding to the keys in `A` and implements `Tables.rows`. If the keys in `A` do not
125+
uniquely identify rows in the `table` then an `ArgumentError` is throw. If `force` is true
126+
then the duplicate (non-unique) entries will be overwritten.
127+
"""
128+
function populate!(A, table, value::Symbol; force=false)
129+
# Use a BitArray mask to detect duplicates and error instead of overwriting.
130+
mask = force ? falses() : falses(size(A))
131+
132+
for r in Tables.rows(table)
133+
vals = Tuple(Tables.getcolumn(r, c) for c in dimnames(A))
134+
inds = map(findindex, vals, axiskeys(A))
135+
136+
# Handle duplicate error checking if applicable
137+
if !force
138+
# Error if mask already set.
139+
mask[inds...] && throw(ArgumentError("Key $vals is not unique"))
140+
# Set mask, marking that we've set this index
141+
setindex!(mask, true, inds...)
142+
end
143+
144+
# Insert our value into the data array
145+
setindex!(A, Tables.getcolumn(r, value), inds...)
146+
end
147+
148+
return A
149+
end
150+
151+
"""
152+
wrapdims(table, value, names...; default=undef, sort=false, force=false)
153+
154+
Construct `KeyedArray(NamedDimsArray(A,names),keys)` from a `table` matching
155+
the [Tables.jl](https://github.com/JuliaData/Tables.jl) API.
156+
(It must support both `Tables.columns` and `Tables.rows`.)
157+
158+
The contents of the array is taken from the column `value::Symbol` of the table.
159+
Each symbol in `names` specifies a column whose unique entries
160+
become the keys along a dimenension of the array.
161+
162+
If there is no row in the table matching a possible set of keys,
163+
then this element of the array is undefined, unless you provide the `default` keyword.
164+
If several rows share the same set of keys, then by default an `ArgumentError` is thrown.
165+
Keyword `force=true` will instead cause these non-unique entries to be overwritten.
166+
167+
Setting `AxisKeys.nameouter() = false` will reverse the order of wrappers produced.
168+
"""
169+
function wrapdims(table, value::Symbol, names::Symbol...; kw...)
170+
if nameouter() == false
171+
_wrap_table(KeyedArray, identity, table, value, names...; kw...)
172+
else
173+
_wrap_table(NamedDimsArray, identity, table, value, names...; kw...)
174+
end
175+
end
176+
177+
"""
178+
wrapdims(df, UniqueVector, :val, :x, :y)
179+
180+
Converts at Tables.jl table to a `KeyedArray` + `NamedDimsArray` pair,
181+
using column `:val` for values, and columns `:x, :y` for names & keys.
182+
Optional 2nd argument applies this type to all the key-vectors.
183+
"""
184+
function wrapdims(table, KT::Type, value::Symbol, names::Symbol...; kw...)
185+
if nameouter() == false
186+
_wrap_table(KeyedArray, KT, table, value, names...; kw...)
187+
else
188+
_wrap_table(NamedDimsArray, KT, table, value, names...; kw...)
189+
end
190+
end
191+
192+
function _wrap_table(AT::Type, KT, table, value::Symbol, names::Symbol...; default=undef, sort::Bool=false, kwargs...)
193+
# get columns of the input table source
194+
cols = Tables.columns(table)
195+
196+
# Extract key columns
197+
pairs = map(names) do k
198+
col = unique(Tables.getcolumn(cols, k))
199+
sort && Base.sort!(col)
200+
return k => KT(col)
201+
end
202+
203+
# Extract data/value column
204+
vals = Tables.getcolumn(cols, value)
205+
206+
# Initialize the KeyedArray
207+
sz = length.(last.(pairs))
208+
if default === undef
209+
data = similar(vals, sz)
210+
else
211+
data = similar(vals, Union{eltype(vals), typeof(default)}, sz)
212+
fill!(data, default)
213+
end
214+
A = AT(data; pairs...)
215+
216+
populate!(A, table, value; kwargs...)
217+
return A
218+
end

test/_packages.jl

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,70 @@ end
2929
@testset "tables" begin
3030
using Tables
3131

32-
R = wrapdims(rand(2,3), 11:12, 21:23)
33-
N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0])
34-
35-
@test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value)
36-
@test keys(first(Tables.rows(N))) == (:a, :b, :value)
37-
38-
@test Tables.columns(N).a == [11, 12, 11, 12, 11, 12]
39-
32+
@testset "source" begin
33+
R = wrapdims(rand(2,3), 11:12, 21:23)
34+
N = wrapdims(rand(2,3), a=[11, 12], b=[21, 22, 23.0])
35+
36+
@test keys(first(Tables.rows(R))) == (:dim_1, :dim_2, :value)
37+
@test keys(first(Tables.rows(N))) == (:a, :b, :value)
38+
39+
@test Tables.columns(N).a == [11, 12, 11, 12, 11, 12]
40+
end
41+
@testset "sink" begin
42+
A = KeyedArray(rand(24, 11, 3); time = 0:23, loc = -5:5, id = ["a", "b", "c"])
43+
table = Tables.columntable(A)
44+
45+
# Test fully constructing from a table
46+
# Common when working with adhoc data
47+
B = wrapdims(table, :value, :time, :loc, :id)
48+
@test B == A
49+
50+
# Test wrapping of key vectors, and wrong order:
51+
U = wrapdims(table, UniqueVector, :value, :id, :time, :loc)
52+
@test axiskeys(U, :time) isa UniqueVector
53+
@test U(time=3, id="b") == A(time=3, id="b")
54+
55+
# Test populating an existing array (e.g., expected data based on calculated targets/offsets)
56+
C = KeyedArray(
57+
zeros(Float64, size(A));
58+
time = unique(table.time),
59+
loc = unique(table.loc),
60+
id = unique(table.id),
61+
)
62+
@test C != A
63+
AxisKeys.populate!(C, table, :value)
64+
@test C == A
65+
66+
# Constructing a NamedDimsArray with different default value and table type
67+
# Partial populating
68+
r_table = Tables.rowtable(A)
69+
n = length(r_table)
70+
idx = rand(Bool, n)
71+
D = wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)
72+
# dimnames should still match, but we'll have missing values
73+
@test dimnames(D) == dimnames(A)
74+
@test any(ismissing, D)
75+
76+
# BTW, this is why it's a method of wrapdims, not KeyedArray:
77+
# @code_warntype wrapdims(table, :value, :time, :loc, :id) # ::Any
78+
# @code_warntype wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)
79+
80+
# Construction with invalid columns error as expected, but the specific error is
81+
# dependent on the table type.
82+
# ERROR: ArgumentError: wrong number of names, got (:q, :time, :loc, :id) with ndims(A) == 1
83+
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :q, :time, :loc, :id)
84+
# ERROR: ArgumentError: wrong number of names, got (:value, :p, :loc, :id) with ndims(A) == 1
85+
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :value, :p, :loc, :id)
86+
# ERROR: type NamedTuple has no field q
87+
@test_throws ErrorException wrapdims(Tables.columntable(A), :q, :time, :loc, :id)
88+
# ERROR: type NamedTuple has no field p
89+
@test_throws ErrorException wrapdims(Tables.columntable(A), :value, :p, :loc, :id)
90+
91+
# Construction with duplicates
92+
# ERROR: ArgumentError: Key (Date("2019-01-01"), -5) is not unique
93+
@test_throws ArgumentError wrapdims(table, :value, :time, :loc)
94+
@test wrapdims(r_table, :value, :time, :loc; force=true) == C(:, :, Key("c"))
95+
end
4096
end
4197
@testset "stack" begin
4298
using LazyStack

test/runtests.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ using Statistics, OffsetArrays, Tables, UniqueVectors, LazyStack
99
AxisKeys.nameouter() = false
1010
end
1111

12-
# include("_basic.jl")
12+
include("_basic.jl")
1313

1414
include("_functions.jl")
1515

16-
# include("_fast.jl")
16+
include("_fast.jl")
1717

18-
# include("_packages.jl")
18+
include("_packages.jl")
1919

2020
end
2121
@testset "fast findfirst & findall" begin

0 commit comments

Comments
 (0)