From dbfd05c2ac841d0849f2614fe7aeb96791fe66ca Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 9 Aug 2020 16:25:28 -0700 Subject: [PATCH 1/3] Define halve on RowIterator and NamedTupleIterator --- Project.toml | 4 +++- src/Tables.jl | 2 +- src/fallbacks.jl | 22 ++++++++++++++++++++++ src/namedtuples.jl | 8 ++++++++ test/runtests.jl | 27 ++++++++++++++++++++++++++- 5 files changed, 60 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 0bb5cf5..c0d6ace 100644 --- a/Project.toml +++ b/Project.toml @@ -10,12 +10,14 @@ TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" DataValueInterfaces = "e2d170a0-9d28-54be-80f0-106bbe20a464" +SplittablesBase = "171d559e-b47b-412a-8079-5efa626c420e" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" QueryOperators = "2aef5ad7-51ca-5a8f-8e88-e75cf067b44b" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SplittablesTesting = "3bda5eb5-c32a-4f64-8618-df3be8968470" [compat] julia = "1" @@ -25,4 +27,4 @@ IteratorInterfaceExtensions = "0.1.1, 1" TableTraits = "0.4.1, 1" [targets] -test = ["Test", "DataValues", "QueryOperators", "SparseArrays"] +test = ["Test", "DataValues", "QueryOperators", "SparseArrays", "SplittablesTesting"] diff --git a/src/Tables.jl b/src/Tables.jl index 7c11f56..5e9b1f8 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -1,6 +1,6 @@ module Tables -using LinearAlgebra, DataValueInterfaces, DataAPI, TableTraits, IteratorInterfaceExtensions +using LinearAlgebra, DataValueInterfaces, DataAPI, TableTraits, IteratorInterfaceExtensions, SplittablesBase export rowtable, columntable diff --git a/src/fallbacks.jl b/src/fallbacks.jl index 0fe01a7..bed270e 100644 --- a/src/fallbacks.jl +++ b/src/fallbacks.jl @@ -79,6 +79,28 @@ schema(x::RowIterator) = schema(columns(x)) return ColumnsRow(columns(rows), st), st + 1 end +function consistent_rowcount(cols) + len = length(cols[1]) + if !all(c -> length(c) == len, cols) + throw(ArgumentError("`halve` on columns return inconsistent number or rows")) + end + return len +end + +function SplittablesBase.halve(x::RowIterator) + if isempty(columns(x)) + len = cld(length(x), 2) + return (RowIterator(columns(x), len), RowIterator(columns(x), length(x) - len)) + end + cs = map(SplittablesBase.halve, columns(x)) + lefts = map(first, cs) + rights = map(last, cs) + return ( + RowIterator(lefts, consistent_rowcount(lefts)), + RowIterator(rights, consistent_rowcount(rights)), + ) +end + # this is our generic Tables.rows fallback definition function rows(x::T) where {T} isrowtable(x) && return x diff --git a/src/namedtuples.jl b/src/namedtuples.jl index a149e90..f102493 100644 --- a/src/namedtuples.jl +++ b/src/namedtuples.jl @@ -65,6 +65,14 @@ function Base.iterate(rows::NamedTupleIterator{Nothing}, state::Tuple{Val{names} return NamedTuple{names}(Tuple(getcolumn(row, nm) for nm in names)), (Val(names), (st,)) end +function SplittablesBase.halve(rows::NamedTupleIterator{schema}) where schema + left, right = SplittablesBase.halve(rows.x) + return ( + NamedTupleIterator{schema,typeof(left)}(left), + NamedTupleIterator{schema,typeof(right)}(right), + ) +end + # sink function """ Tables.rowtable(x) => Vector{NamedTuple} diff --git a/test/runtests.jl b/test/runtests.jl index 749caf6..8e1a58b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Test, Tables, TableTraits, DataValues, QueryOperators, IteratorInterfaceExtensions, SparseArrays +using Test, Tables, TableTraits, DataValues, QueryOperators, IteratorInterfaceExtensions, SparseArrays, SplittablesBase, SplittablesTesting @testset "utils.jl" begin @@ -551,3 +551,28 @@ Tables.isrowtable(::Type{IsRowTable}) = true @test Tables.columntable(rt) == Tables.columntable([nt, nt, nt]) end + +@testset "SplittablesBase" begin + nt4 = (a = [0, 1, 2, 3], b = [5, 6, 7, 8]) + nt5 = (a = [0, 1, 2, 3, 4], b = [5, 6, 7, 8, 9]) + SplittablesTesting.test_ordered([ + (label = "RowIterator (length = 4)", data = Tables.rows(nt4)), + (label = "RowIterator (length = 5)", data = Tables.rows(nt5)), + ( + label = "NamedTupleIterator (length = 4)", + data = Tables.namedtupleiterator(Tables.rows(nt4)), + ), + ( + label = "NamedTupleIterator (length = 5)", + data = Tables.namedtupleiterator(Tables.rows(nt5)), + ), + ]) + + @testset "Inconsistent `halve` of columns should throw" begin + rt = Tables.rows((a = [0, 1, 2, 3, 4], b = [5, 6, 7, 8])) + @test_throws( + ArgumentError("`halve` on columns return inconsistent number or rows"), + SplittablesBase.halve(rt) + ) + end +end From 92d57d000cc9bd2542c9fee2bed161a19c67033a Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 9 Aug 2020 16:55:47 -0700 Subject: [PATCH 2/3] Relax isless and isequal to discard column storage types --- src/fallbacks.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fallbacks.jl b/src/fallbacks.jl index bed270e..acf4a5d 100644 --- a/src/fallbacks.jl +++ b/src/fallbacks.jl @@ -26,7 +26,7 @@ Base.@propagate_inbounds getcolumn(c::ColumnsRow, i::Int) = getcolumn(getcolumns Base.@propagate_inbounds getcolumn(c::ColumnsRow, nm::Symbol) = getcolumn(getcolumns(c), nm)[getrow(c)] columnnames(c::ColumnsRow) = columnnames(getcolumns(c)) -@generated function Base.isless(c::ColumnsRow{T}, d::ColumnsRow{T}) where {T <: NamedTuple{names}} where names +@generated function Base.isless(c::ColumnsRow{<:NamedTuple{names}}, d::ColumnsRow{<:NamedTuple{names}}) where names exprs = Expr[] for n in names var1 = Expr(:., :c, QuoteNode(n)) @@ -42,7 +42,7 @@ columnnames(c::ColumnsRow) = columnnames(getcolumns(c)) Expr(:block, exprs...) end -@generated function Base.isequal(c::ColumnsRow{T}, d::ColumnsRow{T}) where {T <: NamedTuple{names}} where names +@generated function Base.isequal(c::ColumnsRow{<:NamedTuple{names}}, d::ColumnsRow{<:NamedTuple{names}}) where names exprs = Expr[] for n in names var1 = Expr(:., :c, QuoteNode(n)) From ecab1ac2e1347454da7390001dd408850d5500b9 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 9 Aug 2020 17:51:35 -0700 Subject: [PATCH 3/3] Test empty rows --- test/runtests.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 8e1a58b..21ec7c9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -555,9 +555,11 @@ end @testset "SplittablesBase" begin nt4 = (a = [0, 1, 2, 3], b = [5, 6, 7, 8]) nt5 = (a = [0, 1, 2, 3, 4], b = [5, 6, 7, 8, 9]) + nt0 = NamedTuple() SplittablesTesting.test_ordered([ (label = "RowIterator (length = 4)", data = Tables.rows(nt4)), (label = "RowIterator (length = 5)", data = Tables.rows(nt5)), + (label = "RowIterator (no columns)", data = Tables.RowIterator(nt0, 5)), ( label = "NamedTupleIterator (length = 4)", data = Tables.namedtupleiterator(Tables.rows(nt4)), @@ -566,6 +568,10 @@ end label = "NamedTupleIterator (length = 5)", data = Tables.namedtupleiterator(Tables.rows(nt5)), ), + ( + label = "NamedTupleIterator (no columns)", + data = Tables.namedtupleiterator(Tables.RowIterator(nt0, 5)), + ), ]) @testset "Inconsistent `halve` of columns should throw" begin