Skip to content

Commit 2fc501b

Browse files
authored
Refactor TableSelection (#299)
* Refactor TableSelection * Remove oinds field from TableSelection * Fix rng in Sample test to avoid StatsBase.jl bug
1 parent 2ed7f99 commit 2fc501b

File tree

5 files changed

+96
-123
lines changed

5 files changed

+96
-123
lines changed

src/tableselection.jl

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,47 @@
22
# Licensed under the MIT License. See LICENSE in the project root.
33
# ------------------------------------------------------------------
44

5-
struct TableSelection{T,C}
5+
"""
6+
TableSelection(table, names)
7+
8+
Stores a sub-`table` with given column `names`.
9+
"""
10+
struct TableSelection{T,N}
611
table::T
7-
cols::C
8-
ncols::Int
9-
names::Vector{Symbol}
10-
onames::Vector{Symbol}
11-
mapnames::Dict{Symbol,Symbol}
12-
13-
function TableSelection(table::T, names, onames) where {T}
14-
cols = Tables.columns(table)
15-
_assert(onames Tables.columnnames(cols), "all selected columns must exist in the table")
16-
ncols = length(names)
17-
mapnames = Dict(zip(names, onames))
18-
new{T,typeof(cols)}(table, cols, ncols, names, onames, mapnames)
19-
end
12+
names::NTuple{N,Symbol}
2013
end
2114

22-
function Base.:(==)(a::TableSelection, b::TableSelection)
23-
a.names != b.names && return false
24-
a.onames != b.onames && return false
25-
all(nm -> Tables.getcolumn(a, nm) == Tables.getcolumn(b, nm), a.names)
15+
function TableSelection(table, names)
16+
cols = Tables.columns(table)
17+
_assert(names Tables.columnnames(cols), "invalid columns for table selection")
18+
TableSelection(table, Tuple(names))
2619
end
2720

28-
function Base.show(io::IO, t::TableSelection)
29-
println(io, "TableSelection")
30-
pretty_table(io, t, vcrop_mode=:middle, newline_at_end=false)
31-
end
32-
33-
# Tables.jl interface
3421
Tables.istable(::Type{<:TableSelection}) = true
22+
3523
Tables.columnaccess(::Type{<:TableSelection}) = true
24+
3625
Tables.columns(t::TableSelection) = t
26+
3727
Tables.columnnames(t::TableSelection) = t.names
3828

39-
function Tables.getcolumn(t::TableSelection, i::Int)
40-
1 i t.ncols || error("Table has no column with index $i.")
41-
Tables.getcolumn(t.cols, t.mapnames[t.names[i]])
42-
end
29+
Tables.getcolumn(t::TableSelection, i::Int) = Tables.getcolumn(Tables.columns(t.table), t.names[i])
4330

44-
function Tables.getcolumn(t::TableSelection, nm::Symbol)
45-
nm t.names && error("Table has no column $nm.")
46-
Tables.getcolumn(t.cols, t.mapnames[nm])
47-
end
31+
Tables.getcolumn(t::TableSelection, nm::Symbol) = Tables.getcolumn(Tables.columns(t.table), nm)
4832

4933
Tables.materializer(t::TableSelection) = Tables.materializer(t.table)
5034

5135
function Tables.schema(t::TableSelection)
52-
schema = Tables.schema(t.cols)
53-
names = schema.names
54-
types = schema.types
55-
inds = indexin(t.onames, collect(names))
56-
Tables.Schema(t.names, types[inds])
36+
schema = Tables.schema(t.table)
37+
tnames = collect(t.names)
38+
snames = collect(schema.names)
39+
inds = indexin(tnames, snames)
40+
names = schema.names[inds]
41+
types = schema.types[inds]
42+
Tables.Schema(names, types)
43+
end
44+
45+
function Base.show(io::IO, t::TableSelection)
46+
println(io, "TableSelection")
47+
pretty_table(io, t, vcrop_mode=:bottom, newline_at_end=false)
5748
end

src/transforms/select.jl

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,19 @@ Select(pairs::Pair{C,S}...) where {C<:Column,S<:AbstractString} =
4545

4646
Select() = throw(ArgumentError("cannot create Select transform without arguments"))
4747

48-
# utils
49-
_newnames(::Nothing, select) = select
50-
_newnames(names::Vector{Symbol}, select) = names
51-
5248
function applyfeat(transform::Select, feat, prep)
5349
cols = Tables.columns(feat)
5450
names = collect(Tables.columnnames(cols))
55-
select = transform.selector(names)
56-
newnames = _newnames(transform.newnames, select)
57-
newfeat = TableSelection(feat, newnames, select)
51+
52+
# lazy selection of columns
53+
snames = transform.selector(names)
54+
stable = TableSelection(feat, snames)
55+
56+
# rename if necessary
57+
nnames = transform.newnames
58+
rename = isnothing(nnames) ? Identity() : Rename(nnames)
59+
newfeat = stable |> rename
60+
5861
newfeat, nothing
5962
end
6063

@@ -92,9 +95,8 @@ Reject(::AllSelector) = throw(ArgumentError("cannot reject all columns"))
9295
function applyfeat(transform::Reject, feat, prep)
9396
cols = Tables.columns(feat)
9497
names = Tables.columnnames(cols)
95-
reject = transform.selector(names)
96-
select = setdiff(names, reject)
97-
strans = Select(select)
98-
newfeat, _ = applyfeat(strans, feat, prep)
98+
snames = transform.selector(names)
99+
select = Select(setdiff(names, snames))
100+
newfeat, _ = applyfeat(select, feat, prep)
99101
newfeat, nothing
100102
end

test/tableselection.jl

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88
t = Table(; a, b, c, d, e, f)
99

1010
# Tables.jl interface
11-
select = [:a, :b, :e]
12-
newnames = select
13-
s = TT.TableSelection(t, newnames, select)
11+
names = [:a, :b, :e]
12+
s = TT.TableSelection(t, names)
1413
@test Tables.istable(s) == true
1514
@test Tables.columnaccess(s) == true
1615
@test Tables.rowaccess(s) == false
1716
@test Tables.columns(s) === s
18-
@test Tables.columnnames(s) == [:a, :b, :e]
17+
@test Tables.columnnames(s) == (:a, :b, :e)
1918
@test Tables.schema(s).names == (:a, :b, :e)
2019
@test Tables.schema(s).types == (Float64, Float64, Float64)
2120
@test Tables.materializer(s) == Tables.materializer(t)
@@ -26,36 +25,16 @@
2625
@test Tables.getcolumn(s, 1) == Tables.getcolumn(cols, 1)
2726
@test Tables.getcolumn(s, 3) == Tables.getcolumn(cols, :e)
2827

29-
# selectin with renaming
30-
select = [:c, :d, :f]
31-
newnames = [:x, :y, :z]
32-
s = TT.TableSelection(t, newnames, select)
33-
@test Tables.columnnames(s) == [:x, :y, :z]
34-
@test Tables.getcolumn(s, :x) == t.c
35-
@test Tables.getcolumn(s, :y) == t.d
36-
@test Tables.getcolumn(s, :z) == t.f
37-
@test Tables.getcolumn(s, 1) == t.c
38-
@test Tables.getcolumn(s, 2) == t.d
39-
@test Tables.getcolumn(s, 3) == t.f
40-
4128
# row table
42-
select = [:a, :b, :e]
43-
newnames = select
29+
names = [:a, :b, :e]
4430
rt = Tables.rowtable(t)
45-
s = TT.TableSelection(rt, newnames, select)
31+
s = TT.TableSelection(rt, names)
4632
cols = Tables.columns(rt)
4733
@test Tables.getcolumn(s, :a) == Tables.getcolumn(cols, :a)
4834
@test Tables.getcolumn(s, 1) == Tables.getcolumn(cols, 1)
4935
@test Tables.getcolumn(s, 3) == Tables.getcolumn(cols, :e)
5036

5137
# throws
52-
@test_throws AssertionError TT.TableSelection(t, [:a, :b, :z], [:a, :b, :z])
53-
@test_throws AssertionError TT.TableSelection(t, [:x, :y, :z], [:c, :d, :k])
54-
s = TT.TableSelection(t, [:a, :b, :e], [:a, :b, :e])
55-
@test_throws ErrorException Tables.getcolumn(s, :f)
56-
@test_throws ErrorException Tables.getcolumn(s, 4)
57-
s = TT.TableSelection(t, [:x, :y, :z], [:c, :d, :f])
58-
@test_throws ErrorException Tables.getcolumn(s, :c)
59-
@test_throws ErrorException Tables.getcolumn(s, 4)
60-
@test_throws ErrorException Tables.getcolumn(s, -2)
38+
@test_throws AssertionError TT.TableSelection(t, [:a, :b, :z])
39+
@test_throws AssertionError TT.TableSelection(t, [:x, :y, :z])
6140
end

test/transforms/sample.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@test n.b t.b
1717
@test n.c t.c
1818

19-
T = Sample(30, replace=true, ordered=true)
19+
T = Sample(30, replace=true, ordered=true, rng=StableRNG(1))
2020
n, c = apply(T, t)
2121
trows = Tables.rowtable(t)
2222
@test unique(Tables.rowtable(n)) == trows

0 commit comments

Comments
 (0)