Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions src/Tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -569,15 +569,15 @@ struct Partitioner{T}
end

"""
Tables.getrows(x, inds; view=nothing)
Tables.subset(x, inds; view=nothing)

Return one or more rows from table `x` according to the position(s) specified by `inds`:

- If `inds` is a single non-boolean integer return a row object.
- If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return an indexable object of rows.
- If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return a subset of the original table according to the indices.
In this case, the returned type is not necessarily the same as the original table type.

If other type of `inds` is passed than specified above the behavior is undefined.
If other types of `inds` are passed than specified above the behavior is undefined.

The `view` argument influences whether the returned object is a view of the original table
or an independent copy:
Expand All @@ -587,11 +587,37 @@ or an independent copy:
- If `view=true` then a view is returned and if `view=false` a copy is returned.
This applies both to returning a row or a table.

Any specialized implementation of `getrows` must support the `view=nothing` argument.
Any specialized implementation of `subset` must support the `view=nothing` argument.
Support for `view=true` or `view=false` is optional
(i.e. implementations might error on them if they are not supported).
"""
function getrows end
function subset(x::T, inds; view::Union{Bool, Nothing}=nothing) where {T}
# because this method is being called, we know `x` didn't define it's own Tables.subset
# first check if it supports column access, and if so, apply inds and wrap columns in a DictColumnTable
if columnaccess(x)
cols = columns(x)
if inds isa Integer
return ColumnsRow(cols, inds)
else
ret = view === true ? _map(c -> Base.view(c, inds), cols) : _map(c -> c[inds], cols)
return DictColumnTable(schema(cols), ret)
Copy link
Member

@bkamins bkamins Sep 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a special case someone can pass CartesianIndex as inds - do we want to handle it in the ColumnsRow case or error on it here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like our existing vectorcheck handles that

end
end
# otherwise, let's get the rows and see if we can apply inds to them
r = rows(x)
if r isa AbstractVector
inds isa Integer && return r[inds]
ret = view === true ? Base.view(x, inds) : x[inds]
(ret isa AbstractVector) || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(ret))"))
return ret
end
throw(ArgumentError("no default `Tables.subset` implementation for type: $T"))
end

vectorcheck(x::AbstractVector) = x
vectorcheck(x) = throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(x))"))
_map(f, cols) = OrderedDict(nm => vectorcheck(f(getcolumn(cols, nm))) for nm in columnnames(cols))


"""
Tables.partitioner(f, itr)
Expand Down
12 changes: 12 additions & 0 deletions src/dicts.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Dict of Vectors as table
struct DictColumnTable <: AbstractColumns
schema::Schema
values::OrderedDict{Symbol, AbstractVector}
Expand Down Expand Up @@ -94,6 +95,7 @@ columnnames(x::DictColumnTable) = getfield(x, :schema).names
getcolumn(x::DictColumnTable, i::Int) = getfield(x, :values)[columnnames(x)[i]]
getcolumn(x::DictColumnTable, nm::Symbol) = getfield(x, :values)[nm]

# Vector of Dicts as table
struct DictRowTable
names::Vector{Symbol}
types::Dict{Symbol, Type}
Expand Down Expand Up @@ -122,6 +124,16 @@ function Base.iterate(x::DictRowTable, st=1)
return DictRow(x.names, x.values[st]), st + 1
end

function subset(x::DictRowTable, inds; view::Union{Bool,Nothing} = nothing)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar comments as above to inds apply here

values = view === true ? Base.view(getfield(x, :values), inds) : getfield(x, :values)[inds]
if inds isa Integer
return DictRow(getfield(x, :names), values)
else
values isa AbstractVector || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `RowTable` output, got $(typeof(ret))"))
return DictRowTable(getfield(x, :names), getfield(x, :types), values)
end
end

"""
Tables.dictrowtable(x) => Tables.DictRowTable

Expand Down
24 changes: 8 additions & 16 deletions src/namedtuples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,18 @@ function rowtable(itr::T) where {T}
return collect(namedtupleiterator(eltype(r), r))
end

function getrows(x::RowTable, inds; view::Union{Bool,Nothing} = nothing)
if view === true
return Base.view(x, inds)
# NamedTuple of arrays of matching dimensionality
const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractVector{S} where S}} where {N}
rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1])

function subset(x::ColumnTable, inds; view::Union{Bool,Nothing}=nothing)
if inds isa Integer
return map(c -> c[inds], x)
else
return x[inds]
return view === true ? map(c -> vectorcheck(Base.view(c, inds)), x) : map(c -> vectorcheck(c[inds]), x)
end
end

# NamedTuple of arrays of matching dimensionality
const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractArray{S, D} where S}} where {N, D}
rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1])

# interface implementation
istable(::Type{<:ColumnTable}) = true
columnaccess(::Type{<:ColumnTable}) = true
Expand Down Expand Up @@ -181,11 +181,3 @@ function columntable(itr::T) where {T}
return columntable(schema(cols), cols)
end
columntable(x::ColumnTable) = x

function getrows(x::ColumnTable, inds; view::Union{Bool,Nothing} = nothing)
if view === true
return map(c -> Base.view(c, inds), x)
else
return map(c -> c[inds], x)
end
end
51 changes: 33 additions & 18 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,30 +145,32 @@ end
@test Tables.buildcolumns(nothing, rt) == nt
@test Tables.columntable(nothing, nt) == nt

@testset "columntable getrows" begin
@test Tables.getrows(nt, 1) == (a=1, b=4.0, c="7")
@test Tables.getrows(nt, 1, view=false) == (a=1, b=4.0, c="7")
@test Tables.getrows(nt, 1, view=nothing) == (a=1, b=4.0, c="7")
@test Tables.getrows(nt, 1:2) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test Tables.getrows(nt, 1:2, view=false) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test Tables.getrows(nt, 1:2, view=nothing) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@testset "columntable subset" begin
@test Tables.subset(nt, 1) == (a=1, b=4.0, c="7")
@test Tables.subset(nt, 1, view=false) == (a=1, b=4.0, c="7")
@test Tables.subset(nt, 1, view=nothing) == (a=1, b=4.0, c="7")
@test Tables.subset(nt, 1:2) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test Tables.subset(nt, 1:2, view=false) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test Tables.subset(nt, 1:2, view=nothing) == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test_throws ArgumentError Tables.subset(nt, [1:2 1:2])

@test Tables.getrows(nt, 1, view=true) == (a = fill(1), b = fill(4.0), c = fill("7"))
rs = Tables.getrows(nt, 1:2, view=true)
@test Tables.subset(nt, 1, view=true) == (a=1, b=4.0, c="7")
rs = Tables.subset(nt, 1:2, view=true)
@test rs == (a=[1,2], b=[4.0, 5.0], c=["7","8"])
@test rs.a.parent === nt.a
end

@testset "rowtable getrows" begin
@test Tables.getrows(rt, 1) == (a=1, b=4.0, c="7")
@test Tables.getrows(rt, 1, view=false) == (a=1, b=4.0, c="7")
@test Tables.getrows(rt, 1, view=nothing) == (a=1, b=4.0, c="7")
@test Tables.getrows(rt, 1:2) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test Tables.getrows(rt, 1:2, view=false) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test Tables.getrows(rt, 1:2, view=nothing) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@testset "rowtable subset" begin
@test Tables.subset(rt, 1) == (a=1, b=4.0, c="7")
@test Tables.subset(rt, 1, view=false) == (a=1, b=4.0, c="7")
@test Tables.subset(rt, 1, view=nothing) == (a=1, b=4.0, c="7")
@test Tables.subset(rt, 1:2) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test Tables.subset(rt, 1:2, view=false) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test Tables.subset(rt, 1:2, view=nothing) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test_throws ArgumentError Tables.subset(rt, [1:2 1:2])

@test Tables.getrows(rt, 1, view=true) == fill((a = 1, b = 4.0, c = "7"))
rs = Tables.getrows(rt, 1:2, view=true)
@test Tables.subset(rt, 1, view=true) == (a=1, b=4.0, c="7")
rs = Tables.subset(rt, 1:2, view=true)
@test rs == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")]
@test rs.parent === rt
end
Expand Down Expand Up @@ -714,6 +716,12 @@ end
@test dct.a == [1, 2, 3]
@test dct.b == [4.0, 5.0, 6.0]
@test dct.c == ["7", "8", "9"]
# Tables.subset
drow = Tables.subset(dct, 1)
@test drow.a == 1 && drow.b == 4.0 && drow.c == "7"
drows = Tables.subset(dct, [1, 2])
@test drows.a == [1, 2] && drows.b == [4.0, 5.0] && drows.c == ["7", "8"]
@test Tables.rowcount(drows) == 2

dct = Tables.dictcolumntable(ct)
@test dct.a == [1, 2, 3]
Expand Down Expand Up @@ -760,6 +768,13 @@ end
# https://github.com/JuliaData/Tables.jl/issues/286
dta = Tables.dictcolumntable([(; a="hey"), (; b=2)]).a
@test isequal(dta, ["hey", missing])
# Tables.subset
drow = Tables.subset(drt, 1)
@test drow.a == 1 && drow.b == 2 && drow.c == 3
drows = Tables.subset(drt, [1, 2])
@test length(drows) == 2
drowsv = Tables.subset(drt, [1, 2]; view=true)
@test length(drowsv) == 2
end

# extremely wide tables
Expand Down