Add tests to improve coverage (#191)

kleinschmidt · nalimilan · web-flow · commit 0d7599bde2ed · 2020-09-14T14:55:43.000-04:00
* some tests for has_schema etc.

* missing method for has_schema

* test model trait functions drop_intercept and implicit_intercept

* fix typo in test

* test cases for implicit_intercept(Any)

* Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat &lt;nalimilan@club.fr&gt;

Co-authored-by: Milan Bouchet-Valat &lt;nalimilan@club.fr&gt;
diff --git a/src/schema.jl b/src/schema.jl
@@ -246,6 +246,7 @@ has_schema(t::Term) = false
 has_schema(t::Union{ContinuousTerm,CategoricalTerm}) = true
 has_schema(t::InteractionTerm) = all(has_schema(tt) for tt in t.terms)
 has_schema(t::TupleTerm) = all(has_schema(tt) for tt in t)
+has_schema(t::MatrixTerm) = has_schema(t.terms)
 has_schema(t::FormulaTerm) = has_schema(t.lhs) && has_schema(t.rhs)
 
 struct FullRank
@@ -263,7 +264,7 @@ function apply_schema(t::FormulaTerm, schema::Schema, Mod::Type{<:StatisticalMod
     schema = FullRank(schema)
 
     # Models with the drop_intercept trait do not support intercept terms,
-    # usually because they include one implicitly.
+    # usually because one is always necessarily included during fitting
     if drop_intercept(Mod)
         if hasintercept(t)
             throw(ArgumentError("Model type $Mod doesn't support intercept " *
diff --git a/test/modelmatrix.jl b/test/modelmatrix.jl
@@ -350,6 +350,24 @@
         @test reduce(vcat, last.(modelcols.(Ref(f), Tables.rowtable(d)))') == modelmatrix(f,d)
     end
 
+    @testset "modelmatrix and response set schema if needed" begin
+        d = DataFrame(r = rand(8),
+                      w = rand(8),
+                      x = repeat([:a, :b], outer = 4),
+                      y = repeat([:c, :d], inner = 2, outer = 2),
+                      z = repeat([:e, :f], inner = 4))
+    
+        f = @formula(r ~ 1 + w*x*y*z)
+
+        mm1 = modelmatrix(f, d)
+        mm2 = modelmatrix(apply_schema(f, schema(d)), d)
+        @test mm1 == mm2
+
+        r1 = response(f, d)
+        r2 = response(apply_schema(f, schema(d)), d)
+        @test r1 == r2
+    end
+
     @testset "setcontrasts!" begin
         @testset "#95" begin
             tbl = (Y = randn(8),
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -17,7 +17,8 @@ my_tests = ["ambiguity.jl",
             "modelframe.jl",
             "statsmodel.jl",
             "contrasts.jl",
-            "extension.jl"]
+            "extension.jl",
+            "traits.jl"]
 
 @testset "StatsModels" begin
     for tf in my_tests
diff --git a/test/schema.jl b/test/schema.jl
@@ -34,5 +34,34 @@
         @test f3.rhs.terms[end] === hint
 
     end
+
+    @testset "has_schema" begin
+        using StatsModels: has_schema
+
+        d = (y = rand(10), a = rand(10), b = repeat([:a, :b], 5))
+        
+        f = @formula(y ~ a*b)
+        @test !has_schema(f)
+        @test !has_schema(f.rhs)
+        @test !has_schema(StatsModels.collect_matrix_terms(f.rhs))
+
+        ff = apply_schema(f, schema(d))
+        @test has_schema(ff)
+        @test has_schema(ff.rhs)
+        @test has_schema(StatsModels.collect_matrix_terms(ff.rhs))
+
+        sch = schema(d)
+        a, b = term.((:a, :b))
+        @test !has_schema(a)
+        @test has_schema(sch[a])
+        @test !has_schema(b)
+        @test has_schema(sch[b])
+
+        @test !has_schema(a & b)
+        @test !has_schema(a & sch[b])
+        @test !has_schema(sch[a] & a)
+        @test has_schema(sch[a] & sch[b])
+
+    end
     
 end
diff --git a/test/traits.jl b/test/traits.jl
@@ -0,0 +1,103 @@
+using StatsModels: hasintercept, omitsintercept
+import StatsModels: drop_intercept, implicit_intercept
+
+struct DroppyMod <: StatisticalModel end
+drop_intercept(::Type{DroppyMod}) = true
+
+# define structs for testing implicit intercept trait:
+
+# default for StatisticalModel is true
+struct DefaultImplicit <: StatisticalModel end
+
+# override default = true for StatisticalModels
+struct NoImplicit <: StatisticalModel end
+implicit_intercept(::Type{NoImplicit}) = false
+
+# manual override of default = false
+struct YesImplicit end
+implicit_intercept(::Type{YesImplicit}) = true
+
+
+@testset "Model traits" begin
+    d = (y = rand(10), x = rand(10), z = [:a, :b, :c])
+    sch = schema(d)
+    f = @formula(y ~ x)
+    f1 = @formula(y ~ 1 + x)
+    f0 = @formula(y ~ 0 + x)
+
+    @testset "drop_intercept" begin
+        @test_throws ArgumentError apply_schema(f1, sch, DroppyMod)
+        ff = apply_schema(f, sch, DroppyMod)
+        @test !hasintercept(ff)
+        @test !omitsintercept(ff)
+        ff0 = apply_schema(f0, sch, DroppyMod)
+        @test !hasintercept(ff0)
+        @test omitsintercept(ff0)
+
+        @test drop_intercept(DroppyMod()) == drop_intercept(DroppyMod)
+        # drop_intercept blocks implicit_intercept == true
+        @test implicit_intercept(DroppyMod)
+
+        @testset "categorical promotion" begin 
+            # drop_intercept == true means that model should always ACT like
+            # intercept is present even if it's not specified or even ommitted.
+            # (pushes intercept term to the FullRank already seen terms list).
+
+            # full dummy coding
+            @test width(apply_schema(@formula(y ~ 0 + z), sch, StatisticalModel).rhs) == 3
+            # droppy regular coding
+            @test width(apply_schema(@formula(y ~ 0 + z), sch, DroppyMod).rhs) == 2
+        end
+    end
+
+    @testset "implicit_intercept" begin
+        @testset "default" begin
+            ff, ff0, ff1 = apply_schema.((f, f0, f1), Ref(sch), Any)
+            @test !hasintercept(ff)
+            @test !hasintercept(ff0)
+            @test hasintercept(ff1)
+            @test !omitsintercept(ff)
+            @test omitsintercept(ff0)
+            @test !omitsintercept(ff1)
+        end
+
+        @testset "StatisticalModel default" begin
+            ff, ff0, ff1 = apply_schema.((f, f0, f1), Ref(sch), DefaultImplicit)
+            @test hasintercept(ff)
+            @test !hasintercept(ff0)
+            @test hasintercept(ff1)
+            @test !omitsintercept(ff)
+            @test omitsintercept(ff0)
+            @test !omitsintercept(ff1)
+
+            @test implicit_intercept(DefaultImplicit()) == implicit_intercept(DefaultImplicit)
+        end
+
+        @testset "Override StatisticalModel default" begin
+            ff, ff0, ff1 = apply_schema.((f, f0, f1), Ref(sch), NoImplicit)
+            @test !hasintercept(ff)
+            @test !hasintercept(ff0)
+            @test hasintercept(ff1)
+            @test !omitsintercept(ff)
+            @test omitsintercept(ff0)
+            @test !omitsintercept(ff1)
+
+            @test implicit_intercept(NoImplicit()) == implicit_intercept(NoImplicit)
+        end
+
+        @testset "Override Any default" begin
+            ff, ff0, ff1 = apply_schema.((f, f0, f1), Ref(sch), YesImplicit)
+            # broken because traits are not checked during apply_schema for
+            # context that is not <:StatisticalModel
+            @test_broken hasintercept(ff)
+            @test !hasintercept(ff0)
+            @test hasintercept(ff1)
+            @test !omitsintercept(ff)
+            @test omitsintercept(ff0)
+            @test !omitsintercept(ff1)
+
+            @test implicit_intercept(YesImplicit()) == implicit_intercept(YesImplicit)
+        end
+        
+    end
+end