Skip to content

Commit 2d97b73

Browse files
authored
Change getrows -> subset (#292)
Also adds a generic implementation of subset for column-oriented tables and row-oriented tables that are AbstractVector. Adds implementation and tests for DictRowTable/DictColumnTable.
1 parent 9c92441 commit 2d97b73

File tree

4 files changed

+84
-39
lines changed

4 files changed

+84
-39
lines changed

src/Tables.jl

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -569,15 +569,15 @@ struct Partitioner{T}
569569
end
570570

571571
"""
572-
Tables.getrows(x, inds; view=nothing)
572+
Tables.subset(x, inds; view=nothing)
573573
574574
Return one or more rows from table `x` according to the position(s) specified by `inds`:
575575
576576
- If `inds` is a single non-boolean integer return a row object.
577-
- If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return an indexable object of rows.
577+
- If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return a subset of the original table according to the indices.
578578
In this case, the returned type is not necessarily the same as the original table type.
579579
580-
If other type of `inds` is passed than specified above the behavior is undefined.
580+
If other types of `inds` are passed than specified above the behavior is undefined.
581581
582582
The `view` argument influences whether the returned object is a view of the original table
583583
or an independent copy:
@@ -587,11 +587,37 @@ or an independent copy:
587587
- If `view=true` then a view is returned and if `view=false` a copy is returned.
588588
This applies both to returning a row or a table.
589589
590-
Any specialized implementation of `getrows` must support the `view=nothing` argument.
590+
Any specialized implementation of `subset` must support the `view=nothing` argument.
591591
Support for `view=true` or `view=false` is optional
592592
(i.e. implementations might error on them if they are not supported).
593593
"""
594-
function getrows end
594+
function subset(x::T, inds; view::Union{Bool, Nothing}=nothing) where {T}
595+
# because this method is being called, we know `x` didn't define it's own Tables.subset
596+
# first check if it supports column access, and if so, apply inds and wrap columns in a DictColumnTable
597+
if columnaccess(x)
598+
cols = columns(x)
599+
if inds isa Integer
600+
return ColumnsRow(cols, inds)
601+
else
602+
ret = view === true ? _map(c -> Base.view(c, inds), cols) : _map(c -> c[inds], cols)
603+
return DictColumnTable(schema(cols), ret)
604+
end
605+
end
606+
# otherwise, let's get the rows and see if we can apply inds to them
607+
r = rows(x)
608+
if r isa AbstractVector
609+
inds isa Integer && return r[inds]
610+
ret = view === true ? Base.view(x, inds) : x[inds]
611+
(ret isa AbstractVector) || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(ret))"))
612+
return ret
613+
end
614+
throw(ArgumentError("no default `Tables.subset` implementation for type: $T"))
615+
end
616+
617+
vectorcheck(x::AbstractVector) = x
618+
vectorcheck(x) = throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(x))"))
619+
_map(f, cols) = OrderedDict(nm => vectorcheck(f(getcolumn(cols, nm))) for nm in columnnames(cols))
620+
595621

596622
"""
597623
Tables.partitioner(f, itr)

src/dicts.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# Dict of Vectors as table
12
struct DictColumnTable <: AbstractColumns
23
schema::Schema
34
values::OrderedDict{Symbol, AbstractVector}
@@ -94,6 +95,7 @@ columnnames(x::DictColumnTable) = getfield(x, :schema).names
9495
getcolumn(x::DictColumnTable, i::Int) = getfield(x, :values)[columnnames(x)[i]]
9596
getcolumn(x::DictColumnTable, nm::Symbol) = getfield(x, :values)[nm]
9697

98+
# Vector of Dicts as table
9799
struct DictRowTable
98100
names::Vector{Symbol}
99101
types::Dict{Symbol, Type}
@@ -122,6 +124,16 @@ function Base.iterate(x::DictRowTable, st=1)
122124
return DictRow(x.names, x.values[st]), st + 1
123125
end
124126

127+
function subset(x::DictRowTable, inds; view::Union{Bool,Nothing} = nothing)
128+
values = view === true ? Base.view(getfield(x, :values), inds) : getfield(x, :values)[inds]
129+
if inds isa Integer
130+
return DictRow(getfield(x, :names), values)
131+
else
132+
values isa AbstractVector || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `RowTable` output, got $(typeof(ret))"))
133+
return DictRowTable(getfield(x, :names), getfield(x, :types), values)
134+
end
135+
end
136+
125137
"""
126138
Tables.dictrowtable(x) => Tables.DictRowTable
127139

src/namedtuples.jl

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,18 @@ function rowtable(itr::T) where {T}
106106
return collect(namedtupleiterator(eltype(r), r))
107107
end
108108

109-
function getrows(x::RowTable, inds; view::Union{Bool,Nothing} = nothing)
110-
if view === true
111-
return Base.view(x, inds)
109+
# NamedTuple of arrays of matching dimensionality
110+
const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractVector{S} where S}} where {N}
111+
rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1])
112+
113+
function subset(x::ColumnTable, inds; view::Union{Bool,Nothing}=nothing)
114+
if inds isa Integer
115+
return map(c -> c[inds], x)
112116
else
113-
return x[inds]
117+
return view === true ? map(c -> vectorcheck(Base.view(c, inds)), x) : map(c -> vectorcheck(c[inds]), x)
114118
end
115119
end
116120

117-
# NamedTuple of arrays of matching dimensionality
118-
const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractArray{S, D} where S}} where {N, D}
119-
rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1])
120-
121121
# interface implementation
122122
istable(::Type{<:ColumnTable}) = true
123123
columnaccess(::Type{<:ColumnTable}) = true
@@ -181,11 +181,3 @@ function columntable(itr::T) where {T}
181181
return columntable(schema(cols), cols)
182182
end
183183
columntable(x::ColumnTable) = x
184-
185-
function getrows(x::ColumnTable, inds; view::Union{Bool,Nothing} = nothing)
186-
if view === true
187-
return map(c -> Base.view(c, inds), x)
188-
else
189-
return map(c -> c[inds], x)
190-
end
191-
end

test/runtests.jl

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,30 +145,32 @@ end
145145
@test Tables.buildcolumns(nothing, rt) == nt
146146
@test Tables.columntable(nothing, nt) == nt
147147

148-
@testset "columntable getrows" begin
149-
@test Tables.getrows(nt, 1) == (a=1, b=4.0, c="7")
150-
@test Tables.getrows(nt, 1, view=false) == (a=1, b=4.0, c="7")
151-
@test Tables.getrows(nt, 1, view=nothing) == (a=1, b=4.0, c="7")
152-
@test Tables.getrows(nt, 1:2) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
153-
@test Tables.getrows(nt, 1:2, view=false) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
154-
@test Tables.getrows(nt, 1:2, view=nothing) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
148+
@testset "columntable subset" begin
149+
@test Tables.subset(nt, 1) == (a=1, b=4.0, c="7")
150+
@test Tables.subset(nt, 1, view=false) == (a=1, b=4.0, c="7")
151+
@test Tables.subset(nt, 1, view=nothing) == (a=1, b=4.0, c="7")
152+
@test Tables.subset(nt, 1:2) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
153+
@test Tables.subset(nt, 1:2, view=false) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
154+
@test Tables.subset(nt, 1:2, view=nothing) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
155+
@test_throws ArgumentError Tables.subset(nt, [1:2 1:2])
155156

156-
@test Tables.getrows(nt, 1, view=true) == (a = fill(1), b = fill(4.0), c = fill("7"))
157-
rs = Tables.getrows(nt, 1:2, view=true)
157+
@test Tables.subset(nt, 1, view=true) == (a=1, b=4.0, c="7")
158+
rs = Tables.subset(nt, 1:2, view=true)
158159
@test rs == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
159160
@test rs.a.parent === nt.a
160161
end
161162

162-
@testset "rowtable getrows" begin
163-
@test Tables.getrows(rt, 1) == (a=1, b=4.0, c="7")
164-
@test Tables.getrows(rt, 1, view=false) == (a=1, b=4.0, c="7")
165-
@test Tables.getrows(rt, 1, view=nothing) == (a=1, b=4.0, c="7")
166-
@test Tables.getrows(rt, 1:2) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
167-
@test Tables.getrows(rt, 1:2, view=false) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
168-
@test Tables.getrows(rt, 1:2, view=nothing) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
163+
@testset "rowtable subset" begin
164+
@test Tables.subset(rt, 1) == (a=1, b=4.0, c="7")
165+
@test Tables.subset(rt, 1, view=false) == (a=1, b=4.0, c="7")
166+
@test Tables.subset(rt, 1, view=nothing) == (a=1, b=4.0, c="7")
167+
@test Tables.subset(rt, 1:2) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
168+
@test Tables.subset(rt, 1:2, view=false) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
169+
@test Tables.subset(rt, 1:2, view=nothing) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
170+
@test_throws ArgumentError Tables.subset(rt, [1:2 1:2])
169171

170-
@test Tables.getrows(rt, 1, view=true) == fill((a = 1, b = 4.0, c = "7"))
171-
rs = Tables.getrows(rt, 1:2, view=true)
172+
@test Tables.subset(rt, 1, view=true) == (a=1, b=4.0, c="7")
173+
rs = Tables.subset(rt, 1:2, view=true)
172174
@test rs == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
173175
@test rs.parent === rt
174176
end
@@ -714,6 +716,12 @@ end
714716
@test dct.a == [1, 2, 3]
715717
@test dct.b == [4.0, 5.0, 6.0]
716718
@test dct.c == ["7", "8", "9"]
719+
# Tables.subset
720+
drow = Tables.subset(dct, 1)
721+
@test drow.a == 1 && drow.b == 4.0 && drow.c == "7"
722+
drows = Tables.subset(dct, [1, 2])
723+
@test drows.a == [1, 2] && drows.b == [4.0, 5.0] && drows.c == ["7", "8"]
724+
@test Tables.rowcount(drows) == 2
717725

718726
dct = Tables.dictcolumntable(ct)
719727
@test dct.a == [1, 2, 3]
@@ -760,6 +768,13 @@ end
760768
# https://github.com/JuliaData/Tables.jl/issues/286
761769
dta = Tables.dictcolumntable([(; a="hey"), (; b=2)]).a
762770
@test isequal(dta, ["hey", missing])
771+
# Tables.subset
772+
drow = Tables.subset(drt, 1)
773+
@test drow.a == 1 && drow.b == 2 && drow.c == 3
774+
drows = Tables.subset(drt, [1, 2])
775+
@test length(drows) == 2
776+
drowsv = Tables.subset(drt, [1, 2]; view=true)
777+
@test length(drowsv) == 2
763778
end
764779

765780
# extremely wide tables

0 commit comments

Comments
 (0)