From 6cd1f50d650ba605dd7ac73a8a2afe62087303f1 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 12 Mar 2022 00:19:21 -0700 Subject: [PATCH] Start an idea of what an "in memory" requirement would look like This has been requested/discussed a number of times; here's one idea of what this could look like. Basically the same as `Tables.rows`, but `Tables.indexablerows` would require an "indexable" object of rows to be returned instead of just an iterator. Indexable is a little vague; to be most useful, we should probably require the return object to be `AbstractVector` since we get lots of fancy indexing/useful behavior that way. The bare minimum indexing interface is just `getindex`, `firstindex`, and `lastindex`, but it seems like people would then just be wanting to do `x[[i, j, k]]` like operations and have to implement their own. So I'm inclined to make the requirement that you have to return an `AbstractVector` of rows. --- src/Tables.jl | 9 +++++++++ src/fallbacks.jl | 18 +++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/Tables.jl b/src/Tables.jl index d3e0328..b899088 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -413,6 +413,15 @@ See also [`rowtable`](@ref) and [`namedtupleiterator`](@ref). """ function rows end +""" + Tables.indexablerows(x) => Row indexable + +Similar to `Tables.rows`, but instead of only returning an iterator, returns an indexable object. +This allows random-access to the input table rows, which can be useful in contexts when +a known subset of the input is needed. +""" +function indexablerows end + # Schema implementation """ Tables.Schema(names, types) diff --git a/src/fallbacks.jl b/src/fallbacks.jl index 485b1ca..d9ec824 100644 --- a/src/fallbacks.jl +++ b/src/fallbacks.jl @@ -54,13 +54,14 @@ end end # RowIterator wraps an AbstractColumns object and provides row iteration via lazy row views -struct RowIterator{T} +struct RowIterator{T} <: AbstractVector{ColumnsRow{T}} columns::T len::Int end Base.eltype(x::RowIterator{T}) where {T} = ColumnsRow{T} Base.length(x::RowIterator) = getfield(x, :len) +Base.size(x::RowIterator) = (length(x),) Base.getproperty(x::RowIterator, nm::Symbol) = getcolumn(x, nm) Base.getproperty(x::RowIterator, i::Int) = getcolumn(x, i) Base.propertynames(x::RowIterator) = columnnames(x) @@ -74,6 +75,11 @@ getcolumn(x::RowIterator, i::Int) = getcolumn(columns(x), i) materializer(x::RowIterator) = materializer(columns(x)) schema(x::RowIterator) = schema(columns(x)) +Base.@propagate_inbounds function Base.getindex(x::RowIterator, i::Int) + @boundscheck checkbounds(x, i) + return ColumnsRow(columns(x), i) +end + @inline function Base.iterate(rows::RowIterator, st=1) st > length(rows) && return nothing return ColumnsRow(columns(rows), st), st + 1 @@ -96,6 +102,16 @@ function rows(x::T) where {T} throw(ArgumentError("no default `Tables.rows` implementation for type: $T")) end +# fallback for indexablerows if not overloaded explicitly +function indexablerows(x::T) where {T} + y = rows(x) + if y isa AbstractArray + return y + else + throw(ArgumentError("no default `Tables.indexablerows` implementation for type: $T")) + end +end + # for AbstractRow iterators, we define a "collect"-like routine to build up columns from iterated rows """