From 0a6d78d6f5da148c366d642cf6b0c01bd942ef8a Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Mon, 9 Mar 2020 11:06:59 +0530 Subject: [PATCH 1/7] Overload == and isequal to check schema equality --- src/schema.jl | 27 +++++++++++++++++++++++++++ src/terms.jl | 40 ++++++++++++++++++++++++++++++++++++++++ test/schema.jl | 13 ++++++++++--- 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index 7ef8e88f..3c0d2852 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -53,6 +53,33 @@ Base.merge!(a::Schema, b::Schema) = (merge!(a.schema, b.schema); a) Base.keys(schema::Schema) = keys(schema.schema) Base.haskey(schema::Schema, key) = haskey(schema.schema, key) +function ==(first::Schema, second::Schema) + first === second && return true + first.schema === second.schema && return true + if length(first.schema) != length(second.schema) + return false + end + for key in keys(first) + !haskey(second, key) && + get(second, key, nothing) != get(first, key, nothing) && return false + end + true +end + +function Base.isequal(first::Schema, second::Schema) + first === second && return true + first.schema === second.schema && return true + if length(first.schema) != length(second.schema) + return false + end + for key in keys(first) + !haskey(second, key) && + isequal(get(second, key, nothing) != get(first, key, nothing)) && + return false + end + true +end + """ schema([terms::AbstractVector{<:AbstractTerm}, ]data, hints::Dict{Symbol}) schema(term::AbstractTerm, data, hints::Dict{Symbol}) diff --git a/src/terms.jl b/src/terms.jl index 2ee5f785..4200ded0 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -1,3 +1,4 @@ +import Base.== , Base.isequal abstract type AbstractTerm end const TermOrTerms = Union{AbstractTerm, NTuple{N, AbstractTerm} where N} const TupleTerm = NTuple{N, TermOrTerms} where N @@ -38,6 +39,8 @@ struct ConstantTerm{T<:Number} <: AbstractTerm end width(::ConstantTerm) = 1 +==(first::ConstantTerm, second::ConstantTerm) = first.n == second.n +isequal(first::ConstantTerm, second::ConstantTerm) = isequal(first.n, second.n) """ FormulaTerm{L,R} <: AbstractTerm @@ -54,6 +57,11 @@ struct FormulaTerm{L,R} <: AbstractTerm rhs::R end +==(first::FormulaTerm, second::FormulaTerm) = first.lhs == second.lhs && + first.rhs == second.rhs +isequal(first::FormulaTerm, second::FormulaTerm) = isequal(first.lhs, second.lhs) && + isequal(first.rhs, second.rhs) + """ FunctionTerm{Forig,Fanon,Names} <: AbstractTerm @@ -127,6 +135,12 @@ FunctionTerm(forig::Fo, fanon::Fa, names::NTuple{N,Symbol}, FunctionTerm{Fo, Fa, names}(forig, fanon, exorig, args_parsed) width(::FunctionTerm) = 1 +==(first::FunctionTerm, second::FunctionTerm) = first.forig == second.forig && + first.args_parsed == second.args_parsed +isequal(first::FunctionTerm, second::FunctionTerm) = + isequal(first.forig, second.forig) && + isequal(first.args_parsed, second.args_parsed) + """ InteractionTerm{Ts} <: AbstractTerm @@ -174,6 +188,10 @@ struct InteractionTerm{Ts} <: AbstractTerm end width(ts::InteractionTerm) = prod(width(t) for t in ts.terms) +==(first::InteractionTerm, second::InteractionTerm) = + first.terms == second.terms +isequal(first::InteractionTerm, second::InteractionTerm) = + isequal(first.terms, second.terms) """ InterceptTerm{HasIntercept} <: AbstractTerm @@ -187,6 +205,10 @@ via the [`implicit_intercept`](@ref) trait). struct InterceptTerm{HasIntercept} <: AbstractTerm end width(::InterceptTerm{H}) where {H} = H ? 1 : 0 +==(first::InterceptTerm, second::InterceptTerm) = width(first) == width(second) +isequal(first::InterceptTerm, second::InterceptTerm) = + isequal(width(first), width(second)) + # Typed terms """ @@ -211,6 +233,14 @@ struct ContinuousTerm{T} <: AbstractTerm end width(::ContinuousTerm) = 1 +==(first::ContinuousTerm, second::ContinuousTerm) = first.sym == second.sym && + first.mean == second.mean && first.var == second.var && + first.min == second.min && first.max == second.max + +isequal(first::ContinuousTerm, second::ContinuousTerm) = + isequal(first.sym, second.sym) && isequal(first.mean, second.mean) && + isequal(first.var, second.var) && isequal(first.min, second.min) && + isequal(first.max, second.max) """ CategoricalTerm{C,T,N} <: AbstractTerm @@ -233,6 +263,12 @@ width(::CategoricalTerm{C,T,N}) where {C,T,N} = N CategoricalTerm(sym::Symbol, contrasts::ContrastsMatrix{C,T}) where {C,T} = CategoricalTerm{C,T,length(contrasts.termnames)}(sym, contrasts) +==(first::CategoricalTerm, second::CategoricalTerm) = + first.sym == second.sym && width(first) == width(second) && + first.contrasts == second.contrasts +isequal(first::CategoricalTerm, second::CategoricalTerm) = + isequal(first.sym, second.sym) && isequal(width(first), width(second)) && + isequal(first.contrasts, second.contrasts) """ MatrixTerm{Ts} <: AbstractTerm @@ -250,6 +286,10 @@ end MatrixTerm(t::AbstractTerm) = MatrixTerm((t, )) width(t::MatrixTerm) = sum(width(tt) for tt in t.terms) +# ==(first::MatrixTerm, second::MatrixTerm) = +# collect_matrix_terms(first.terms) == collect_matrix_terms(second.terms) +# isequal(first::MatrixTerm, second::MatrixTerm) = +# isequal(collect_matrix_terms(first.terms), collect_matrix_terms(second.terms)) """ collect_matrix_terms(ts::TupleTerm) collect_matrix_terms(t::AbstractTerm) = collect_matrix_term((t, )) diff --git a/test/schema.jl b/test/schema.jl index 7786f44b..5b25d032 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -1,10 +1,17 @@ @testset "schemas" begin - + import Base using StatsModels: schema, apply_schema, FullRank - f = @formula(y ~ 1 + a + b + c + b&c) - df = (y = rand(9), a = 1:9, b = rand(9), c = repeat(["d","e","f"], 3)) + f = @formula(y ~ 1 + a + b + c + b & c) + y = rand(9) + b = rand(9) + + df = (y = y, a = 1:9, b = b, c = repeat(["d", "e", "f"], 3)) f = apply_schema(f, schema(f, df)) @test f == apply_schema(f, schema(f, df)) + df2 = (y = y, a = 1:9, b = b, c = repeat(["d", "e", "f"], 3)) + + @test schema(df) == schema(df2) + @test isequal(schema(df), schema(df2)) end From 1ca55c0a79b3b4db9f7558e67a08f815089f2a93 Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Wed, 11 Mar 2020 23:32:55 +0530 Subject: [PATCH 2/7] Remove unneccessary changes. --- src/schema.jl | 14 +++++--------- src/terms.jl | 7 +++++-- test/schema.jl | 24 +++++++++++++++++++++++- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index 3c0d2852..29da2f2a 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -56,11 +56,9 @@ Base.haskey(schema::Schema, key) = haskey(schema.schema, key) function ==(first::Schema, second::Schema) first === second && return true first.schema === second.schema && return true - if length(first.schema) != length(second.schema) - return false - end + length(first.schema) != length(second.schema) && return false for key in keys(first) - !haskey(second, key) && + !haskey(second, key) && return false get(second, key, nothing) != get(first, key, nothing) && return false end true @@ -69,12 +67,10 @@ end function Base.isequal(first::Schema, second::Schema) first === second && return true first.schema === second.schema && return true - if length(first.schema) != length(second.schema) - return false - end + length(first.schema) != length(second.schema) && return false for key in keys(first) - !haskey(second, key) && - isequal(get(second, key, nothing) != get(first, key, nothing)) && + !haskey(second, key) && return false + !isequal(get(second, key, nothing), get(first, key, nothing)) && return false end true diff --git a/src/terms.jl b/src/terms.jl index 4200ded0..4f202801 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -57,8 +57,8 @@ struct FormulaTerm{L,R} <: AbstractTerm rhs::R end -==(first::FormulaTerm, second::FormulaTerm) = first.lhs == second.lhs && - first.rhs == second.rhs +==(first::FormulaTerm, second::FormulaTerm) = + first.lhs == second.lhs && first.rhs == second.rhs isequal(first::FormulaTerm, second::FormulaTerm) = isequal(first.lhs, second.lhs) && isequal(first.rhs, second.rhs) @@ -290,6 +290,9 @@ width(t::MatrixTerm) = sum(width(tt) for tt in t.terms) # collect_matrix_terms(first.terms) == collect_matrix_terms(second.terms) # isequal(first::MatrixTerm, second::MatrixTerm) = # isequal(collect_matrix_terms(first.terms), collect_matrix_terms(second.terms)) +==(first::MatrixTerm, second::MatrixTerm) = first.terms == second.terms +isequal(first::MatrixTerm, second::MatrixTerm) = isequal(first.terms, second.terms) + """ collect_matrix_terms(ts::TupleTerm) collect_matrix_terms(t::AbstractTerm) = collect_matrix_term((t, )) diff --git a/test/schema.jl b/test/schema.jl index 5b25d032..0c564d79 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -10,8 +10,30 @@ f = apply_schema(f, schema(f, df)) @test f == apply_schema(f, schema(f, df)) - df2 = (y = y, a = 1:9, b = b, c = repeat(["d", "e", "f"], 3)) + df2 = (y = y, a = 1:9, b = b, c = [df.c; df.c]) + df3 = (y = y, a = 1:9, b = b, c = repeat(["a", "b", "c"], 3)) + df4 = (y = [df.y; df.y], a = [1:9; 1:9], b = [b; b], c = [df.c; df.c]) + df5 = (z = y, a = 1:9, b = b, c = repeat(["d", "e", "f"], 3)) + + sch = schema(df, Dict(:c => DummyCoding(base="e"))) + sch2 = schema(df, Dict(:c => EffectsCoding(base="e"))) @test schema(df) == schema(df2) + @test schema(df) != schema(df3) + @test schema(df) != schema(df4) + @test schema(df) != schema(df5) + @test sch != sch2 + @test isequal(schema(df), schema(df2)) + @test !isequal(schema(df), schema(df3)) + @test !isequal(schema(df), schema(df4)) + @test !isequal(schema(df), schema(df5)) + @test !isequal(sch, sch2) + + # @test schema(df) == schema(df3) + # @test isequal(schema(df), schema(df3)) + #@test schema(df) != schema(df4) + #@test isequal(schema(df), schema(df4)) + + end From e6f8b87a8e8155bc4260de5ca144b2413e546622 Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Mon, 16 Mar 2020 13:37:54 +0530 Subject: [PATCH 3/7] Consistent formatting style and add more tests. --- src/schema.jl | 5 ++--- src/terms.jl | 40 ++++++++++++++++++++++++---------------- test/schema.jl | 20 ++++++++++++-------- 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index 29da2f2a..22dcab70 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -59,7 +59,7 @@ function ==(first::Schema, second::Schema) length(first.schema) != length(second.schema) && return false for key in keys(first) !haskey(second, key) && return false - get(second, key, nothing) != get(first, key, nothing) && return false + second[key] != first[key] && return false end true end @@ -70,8 +70,7 @@ function Base.isequal(first::Schema, second::Schema) length(first.schema) != length(second.schema) && return false for key in keys(first) !haskey(second, key) && return false - !isequal(get(second, key, nothing), get(first, key, nothing)) && - return false + !isequal(second[key], first[key]) && return false end true end diff --git a/src/terms.jl b/src/terms.jl index 4f202801..b3c37e7d 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -58,8 +58,10 @@ struct FormulaTerm{L,R} <: AbstractTerm end ==(first::FormulaTerm, second::FormulaTerm) = - first.lhs == second.lhs && first.rhs == second.rhs -isequal(first::FormulaTerm, second::FormulaTerm) = isequal(first.lhs, second.lhs) && + first.lhs == second.lhs && + first.rhs == second.rhs +isequal(first::FormulaTerm, second::FormulaTerm) = + isequal(first.lhs, second.lhs) && isequal(first.rhs, second.rhs) """ @@ -135,7 +137,8 @@ FunctionTerm(forig::Fo, fanon::Fa, names::NTuple{N,Symbol}, FunctionTerm{Fo, Fa, names}(forig, fanon, exorig, args_parsed) width(::FunctionTerm) = 1 -==(first::FunctionTerm, second::FunctionTerm) = first.forig == second.forig && +==(first::FunctionTerm, second::FunctionTerm) = + first.forig == second.forig && first.args_parsed == second.args_parsed isequal(first::FunctionTerm, second::FunctionTerm) = isequal(first.forig, second.forig) && @@ -205,7 +208,8 @@ via the [`implicit_intercept`](@ref) trait). struct InterceptTerm{HasIntercept} <: AbstractTerm end width(::InterceptTerm{H}) where {H} = H ? 1 : 0 -==(first::InterceptTerm, second::InterceptTerm) = width(first) == width(second) +==(first::InterceptTerm, second::InterceptTerm) = + width(first) == width(second) isequal(first::InterceptTerm, second::InterceptTerm) = isequal(width(first), width(second)) @@ -234,12 +238,16 @@ end width(::ContinuousTerm) = 1 ==(first::ContinuousTerm, second::ContinuousTerm) = first.sym == second.sym && - first.mean == second.mean && first.var == second.var && - first.min == second.min && first.max == second.max + first.mean == second.mean && + first.var == second.var && + first.min == second.min && + first.max == second.max isequal(first::ContinuousTerm, second::ContinuousTerm) = - isequal(first.sym, second.sym) && isequal(first.mean, second.mean) && - isequal(first.var, second.var) && isequal(first.min, second.min) && + isequal(first.sym, second.sym) && + isequal(first.mean, second.mean) && + isequal(first.var, second.var) && + isequal(first.min, second.min) && isequal(first.max, second.max) """ CategoricalTerm{C,T,N} <: AbstractTerm @@ -264,10 +272,12 @@ CategoricalTerm(sym::Symbol, contrasts::ContrastsMatrix{C,T}) where {C,T} = CategoricalTerm{C,T,length(contrasts.termnames)}(sym, contrasts) ==(first::CategoricalTerm, second::CategoricalTerm) = - first.sym == second.sym && width(first) == width(second) && + first.sym == second.sym && + width(first) == width(second) && first.contrasts == second.contrasts isequal(first::CategoricalTerm, second::CategoricalTerm) = - isequal(first.sym, second.sym) && isequal(width(first), width(second)) && + isequal(first.sym, second.sym) && + isequal(width(first), width(second)) && isequal(first.contrasts, second.contrasts) """ MatrixTerm{Ts} <: AbstractTerm @@ -286,12 +296,10 @@ end MatrixTerm(t::AbstractTerm) = MatrixTerm((t, )) width(t::MatrixTerm) = sum(width(tt) for tt in t.terms) -# ==(first::MatrixTerm, second::MatrixTerm) = -# collect_matrix_terms(first.terms) == collect_matrix_terms(second.terms) -# isequal(first::MatrixTerm, second::MatrixTerm) = -# isequal(collect_matrix_terms(first.terms), collect_matrix_terms(second.terms)) -==(first::MatrixTerm, second::MatrixTerm) = first.terms == second.terms -isequal(first::MatrixTerm, second::MatrixTerm) = isequal(first.terms, second.terms) +==(first::MatrixTerm, second::MatrixTerm) = + first.terms == second.terms +isequal(first::MatrixTerm, second::MatrixTerm) = + isequal(first.terms, second.terms) """ collect_matrix_terms(ts::TupleTerm) diff --git a/test/schema.jl b/test/schema.jl index 0c564d79..89d93d03 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -1,8 +1,7 @@ @testset "schemas" begin - import Base using StatsModels: schema, apply_schema, FullRank - f = @formula(y ~ 1 + a + b + c + b & c) + f = @formula(y ~ 1 + a + log(b) + c + b & c) y = rand(9) b = rand(9) @@ -14,6 +13,9 @@ df3 = (y = y, a = 1:9, b = b, c = repeat(["a", "b", "c"], 3)) df4 = (y = [df.y; df.y], a = [1:9; 1:9], b = [b; b], c = [df.c; df.c]) df5 = (z = y, a = 1:9, b = b, c = repeat(["d", "e", "f"], 3)) + df6 = (y = y, a = 2:10, b = b, c = repeat(["a", "b", "c"], 3)) + df7 = (w = y, d = 1:9, x = b, z = repeat(["d", "e", "f"], 3)) + df8 = (y = y, a = 1:9, c = repeat(["d", "e", "f"], 3)) sch = schema(df, Dict(:c => DummyCoding(base="e"))) sch2 = schema(df, Dict(:c => EffectsCoding(base="e"))) @@ -22,18 +24,20 @@ @test schema(df) != schema(df3) @test schema(df) != schema(df4) @test schema(df) != schema(df5) + @test schema(df) != schema(df6) + @test schema(df) != schema(df7) + @test schema(df) != schema(df8) + @test schema(df8) != schema(df) @test sch != sch2 @test isequal(schema(df), schema(df2)) @test !isequal(schema(df), schema(df3)) @test !isequal(schema(df), schema(df4)) @test !isequal(schema(df), schema(df5)) + @test !isequal(schema(df), schema(df6)) + @test !isequal(schema(df), schema(df7)) + @test !isequal(schema(df), schema(df8)) + @test !isequal(schema(df8), schema(df)) @test !isequal(sch, sch2) - # @test schema(df) == schema(df3) - # @test isequal(schema(df), schema(df3)) - #@test schema(df) != schema(df4) - #@test isequal(schema(df), schema(df4)) - - end From 2f03a7c43eaa10a81d6e98d65a9b90c9d02dbdff Mon Sep 17 00:00:00 2001 From: mrrobot-2000 <60689620+mrrobot-2000@users.noreply.github.com> Date: Mon, 16 Mar 2020 13:46:00 +0530 Subject: [PATCH 4/7] Update src/terms.jl Co-Authored-By: Dave Kleinschmidt --- src/terms.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/terms.jl b/src/terms.jl index b3c37e7d..2fe788a6 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -237,7 +237,8 @@ struct ContinuousTerm{T} <: AbstractTerm end width(::ContinuousTerm) = 1 -==(first::ContinuousTerm, second::ContinuousTerm) = first.sym == second.sym && +==(first::ContinuousTerm, second::ContinuousTerm) = + first.sym == second.sym && first.mean == second.mean && first.var == second.var && first.min == second.min && From ef367d78397ee42f48b67a61ca1acc990ea3a319 Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Mon, 16 Mar 2020 23:14:25 +0530 Subject: [PATCH 5/7] Add more tests. --- test/schema.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/schema.jl b/test/schema.jl index 89d93d03..9b6252a5 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -21,6 +21,7 @@ sch2 = schema(df, Dict(:c => EffectsCoding(base="e"))) @test schema(df) == schema(df2) + @test apply_schema(schema(df)) == apply_schema(schema(df2)) @test schema(df) != schema(df3) @test schema(df) != schema(df4) @test schema(df) != schema(df5) @@ -28,9 +29,11 @@ @test schema(df) != schema(df7) @test schema(df) != schema(df8) @test schema(df8) != schema(df) + @test apply_schema(schema(df)) != apply_schema(schema(df5)) @test sch != sch2 @test isequal(schema(df), schema(df2)) + @test isequal(apply_schema(schema(df)), apply_schema(schema(df2))) @test !isequal(schema(df), schema(df3)) @test !isequal(schema(df), schema(df4)) @test !isequal(schema(df), schema(df5)) @@ -38,6 +41,7 @@ @test !isequal(schema(df), schema(df7)) @test !isequal(schema(df), schema(df8)) @test !isequal(schema(df8), schema(df)) + @test !isequal(apply_schema(schema(df)), apply_schema(schema(df5))) @test !isequal(sch, sch2) end From 993829b8e0b0b7cc8891c778e34c9629360dcaa6 Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Mon, 16 Mar 2020 23:50:14 +0530 Subject: [PATCH 6/7] Fix broken tests. --- test/schema.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/schema.jl b/test/schema.jl index 9b6252a5..5f3b1056 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -21,7 +21,7 @@ sch2 = schema(df, Dict(:c => EffectsCoding(base="e"))) @test schema(df) == schema(df2) - @test apply_schema(schema(df)) == apply_schema(schema(df2)) + @test apply_schema(f, schema(df)) == apply_schema(f, schema(df2)) @test schema(df) != schema(df3) @test schema(df) != schema(df4) @test schema(df) != schema(df5) @@ -29,11 +29,11 @@ @test schema(df) != schema(df7) @test schema(df) != schema(df8) @test schema(df8) != schema(df) - @test apply_schema(schema(df)) != apply_schema(schema(df5)) + @test apply_schema(f, schema(df)) != apply_schema(f, schema(df5)) @test sch != sch2 @test isequal(schema(df), schema(df2)) - @test isequal(apply_schema(schema(df)), apply_schema(schema(df2))) + @test isequal(apply_schema(f, schema(df)), apply_schema(f, schema(df2))) @test !isequal(schema(df), schema(df3)) @test !isequal(schema(df), schema(df4)) @test !isequal(schema(df), schema(df5)) @@ -41,7 +41,7 @@ @test !isequal(schema(df), schema(df7)) @test !isequal(schema(df), schema(df8)) @test !isequal(schema(df8), schema(df)) - @test !isequal(apply_schema(schema(df)), apply_schema(schema(df5))) + @test !isequal(apply_schema(f, schema(df)), apply_schema(f, schema(df5))) @test !isequal(sch, sch2) end From ee16796745d61c71632b684a4c00d57c3e344dfc Mon Sep 17 00:00:00 2001 From: mrrobot-2000 Date: Tue, 31 Mar 2020 18:58:43 +0530 Subject: [PATCH 7/7] Fix broken tests. --- test/schema.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/schema.jl b/test/schema.jl index 5f3b1056..b4521648 100644 --- a/test/schema.jl +++ b/test/schema.jl @@ -29,7 +29,7 @@ @test schema(df) != schema(df7) @test schema(df) != schema(df8) @test schema(df8) != schema(df) - @test apply_schema(f, schema(df)) != apply_schema(f, schema(df5)) + @test apply_schema(f, schema(df)) == apply_schema(f, schema(df5)) @test sch != sch2 @test isequal(schema(df), schema(df2)) @@ -41,7 +41,7 @@ @test !isequal(schema(df), schema(df7)) @test !isequal(schema(df), schema(df8)) @test !isequal(schema(df8), schema(df)) - @test !isequal(apply_schema(f, schema(df)), apply_schema(f, schema(df5))) + @test isequal(apply_schema(f, schema(df)), apply_schema(f, schema(df5))) @test !isequal(sch, sch2) end