Skip to content

Commit 4e60780

Browse files
committed
Updates from code review
1 parent cc209e0 commit 4e60780

File tree

2 files changed

+74
-31
lines changed

2 files changed

+74
-31
lines changed

src/tables.jl

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,54 +117,75 @@ end
117117
# end
118118

119119
"""
120-
populate!(A, table, value)
120+
AxisKeys.populate!(A, table, value; force=false, quiet=false)
121121
122-
Populate A with the contents of the `value` column in a provided table.
123-
The provided table contain columns corresponding to the keys in A and support row iteration.
122+
Populate `A` with the contents of the `value` column in a provided `table`.
123+
The `table` must contain columns corresponding to the keys in `A` and support row iteration.
124+
If the keys in `A` do not uniquely identify rows in the `table` then an
125+
`ArgumentError` is throw. If `force` is true then the duplicate (non-unique) entries will be
126+
overwritten. Setting `quiet` to true will avoid any non-unique warnings.
124127
"""
125-
function populate!(A, table, value::Symbol)
126-
cols = [value, dimnames(A)...]
128+
function populate!(A, table, value::Symbol; force=false, quiet=false)
129+
# Use a BitArray mask to detect duplicates and error instead of overwriting.
130+
mask = falses(size(A))
131+
overwritten = false
132+
127133
for r in Tables.rows(table)
128-
setkey!(A, (Tables.getcolumn(r, c) for c in cols)...)
134+
vals = Tuple(Tables.getcolumn(r, c) for c in dimnames(A))
135+
inds = map(findindex, vals, axiskeys(A))
136+
137+
# Handle duplicate error if applicable
138+
if mask[inds...]
139+
if force
140+
overwritten = true
141+
else
142+
throw(ArgumentError("Key $vals is not unique"))
143+
end
144+
end
145+
146+
# Set or mask marking that we've set this index
147+
setindex!(mask, true, inds...)
148+
149+
# Insert our value into the data array
150+
setindex!(A, Tables.getcolumn(r, value), inds...)
151+
end
152+
153+
if overwritten && !quiet
154+
@warn "Columns $(dimnames(A)) do not uniquely identify rows in the table"
129155
end
156+
130157
return A
131158
end
132159

133160
"""
134-
KeyedArray(table, value, keys...; default=undef)
135-
NamedDimsArray(table, value, keys...; default=undef)
161+
wrapdims(table, [Type,] value, keys...; default=undef, sort=false, force=false, quiet=false)
136162
137163
Construct a KeyedArray/NamedDimsArray from a `table` supporting column and row.
138-
`keys` columns are extracted as is, without sorting, and are assumed to uniquely identify
139-
each `value`. The `default` value is used in cases where no value is identified for a given
140-
keypair.
164+
The `default` value is used in cases where no value is identified for a given keypair.
165+
If the `keys` columns do not uniquely identify rows in the table then an `ArgumentError` is
166+
throw. If `force` is true then the duplicate (non-unique) entries will be
167+
overwritten. Setting `quiet` to true will avoid any non-unique warnings.
141168
"""
142-
function KeyedArray(table, value::Symbol, keys::Symbol...; kwargs...)
143-
return _construct_from_table(KeyedArray, table, value, keys...; kwargs...)
169+
function wrapdims(table, value::Symbol, keys::Symbol...; kwargs...)
170+
wrapdims(table, KeyedArray, value, keys...; kwargs...)
144171
end
145172

146-
function NamedDimsArray(table, value::Symbol, keys::Symbol...; kwargs...)
147-
return _construct_from_table(NamedDimsArray, table, value, keys...; kwargs...)
148-
end
149-
150-
151-
# Internal function for constructing the KeyedArray or NamedDimsArray.
152-
# This code doesn't care which type we produce so we just pass that along.
153-
function _construct_from_table(
154-
T::Type, table, value::Symbol, keys::Symbol...;
155-
default=undef, issorted=false
156-
)
173+
function wrapdims(table, T::Type, value::Symbol, keys::Symbol...; default=undef, sort::Bool=false, kwargs...)
157174
# get columns of the input table source
158175
cols = Tables.columns(table)
159176

160177
# Extract key columns
161-
kw = Tuple(k => unique(Tables.getcolumn(cols, k)) for k in keys)
178+
pairs = map(keys) do k
179+
col = unique(Tables.getcolumn(cols, k))
180+
sort && Base.sort!(col)
181+
return k => col
182+
end
162183

163184
# Extract data/value column
164185
vals = Tables.getcolumn(cols, value)
165186

166187
# Initialize the KeyedArray
167-
sz = length.(last.(kw))
188+
sz = length.(last.(pairs))
168189

169190
A = if default === undef
170191
data = similar(vals, sz)
@@ -173,7 +194,7 @@ function _construct_from_table(
173194
fill!(data, default)
174195
end
175196

176-
A = T(data; kw...)
177-
populate!(A, table, value)
197+
A = T(data; pairs...)
198+
populate!(A, table, value; kwargs...)
178199
return A
179200
end

test/_packages.jl

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ end
4444

4545
# Test fully constructing from a table
4646
# Common when working with adhoc data
47-
B = KeyedArray(table, :value, :time, :loc, :id)
47+
B = wrapdims(table, :value, :time, :loc, :id)
4848
@test B == A
4949

5050
# Test populating an existing array (e.g., expected data based on calculated targets/offsets)
@@ -59,8 +59,30 @@ end
5959
@test C == A
6060

6161
# Constructing a NamedDimsArray with different default value and table type
62-
D = NamedDimsArray(Tables.rowtable(A), :value, :time, :loc, :id; default=missing)
63-
@test D == A
62+
# Partial populating
63+
table = Tables.rowtable(A)
64+
n = length(table)
65+
idx = rand(Bool, n)
66+
D = wrapdims(table[idx], :value, :time, :loc, :id; default=missing)
67+
# dimnames should still match, but we'll have missing values
68+
@test dimnames(D) == dimnames(A)
69+
@test any(ismissing, D)
70+
71+
# Construction with invalid columns error as expected, but the specific error is
72+
# dependent on the table type.
73+
# ERROR: ArgumentError: wrong number of names, got (:q, :time, :loc, :id) with ndims(A) == 1
74+
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :q, :time, :loc, :id)
75+
# ERROR: ArgumentError: wrong number of names, got (:value, :p, :loc, :id) with ndims(A) == 1
76+
@test_throws ArgumentError wrapdims(Tables.rowtable(A), :value, :p, :loc, :id)
77+
# ERROR: type NamedTuple has no field q
78+
@test_throws ErrorException wrapdims(Tables.columntable(A), :q, :time, :loc, :id)
79+
# ERROR: type NamedTuple has no field p
80+
@test_throws ErrorException wrapdims(Tables.columntable(A), :value, :p, :loc, :id)
81+
82+
# Construction with duplicates
83+
# ERROR: ArgumentError: Key (Date("2019-01-01"), -5) is not unique
84+
@test_throws ArgumentError wrapdims(table, :value, :time, :loc)
85+
@test wrapdims(table, :value, :time, :loc; force=true) == C(:, :, Key("c"))
6486
end
6587
end
6688
@testset "stack" begin

0 commit comments

Comments
 (0)