LRT for GLMMs (#279)

palday · dmbates · web-flow · commit 2e431e4679f6 · 2020-02-23T16:03:18.000Z
* LRT for GLMMs and fix bug in safety check for LMM

* pretty printing of LR test

* doc update for named arg use_threads in replicate()

Co-authored-by: Douglas Bates &lt;dmbates@gmail.com&gt;
diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl
@@ -95,6 +95,8 @@ end
 
 StatsBase.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ)
 
+objective(m::GeneralizedLinearMixedModel) = deviance(m)
+
 """
 deviance!(m::GeneralizedLinearMixedModel, nAGQ=1)
 
diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl
@@ -52,10 +52,27 @@ Likeihood ratio test applied to a set of nested models.
 Note that nesting of the models is not checked.  It is incumbent on the user to check this.
 """
 function likelihoodratiotest(m::LinearMixedModel...)
+    allequal(getproperty.(getproperty.(m,:optsum),:REML)) ||
+        throw(ArgumentError("Models must all be fit with the same objective (i.e. alll ML or all REML)"))
     if any(getproperty.(getproperty.(m,:optsum),:REML))
-        reduce(==,coefnames.(m))  ||
+        allequal(coefnames.(m))  ||
                 throw(ArgumentError("Likelihood-ratio tests for REML-fitted models are only valid when the fixed-effects specifications are identical"))
     end
+    _likelihoodratiotest(m...)
+end
+
+function likelihoodratiotest(m::GeneralizedLinearMixedModel...)
+    # TODO: test that all models are fit with same fast/nAGQ option?
+    glms = getproperty.(m,:resp);
+    allequal(Distribution.(glms)) ||
+        throw(ArgumentError("Models must be fit to the same distribution"))
+    allequal(string.(Link.(glms))) ||
+        throw(ArgumentError("Models must have the same link function"))
+
+    _likelihoodratiotest(m...)
+end
+
+function _likelihoodratiotest(m::Vararg{T}) where T <: MixedModel
     m = collect(m)   # change the tuple to an array
     dofs = dof.(m)
     formulas = String.(Symbol.(getproperty.(m,:formula)))
@@ -86,21 +103,62 @@ function Base.show(io::IO, lrt::LikelihoodRatioTest; digits=2)
     println(io, "Model Formulae")
 
     for (i, f) in enumerate(lrt.formulas)
-        println("$i: $f")
+        println(io, "$i: $f")
     end
-    cols = hcat(lrt.models.dof, lrt.models.deviance,
-                _prepend_0(lrt.tests.deviancediff),
-                _prepend_0(lrt.tests.dofdiff),
-                _prepend_0(lrt.tests.pvalues))
-
-    ct = CoefTable(
-        cols, # cols
-        ["model-dof", "deviance", "χ²", "χ²-dof", "P(>χ²)"], # colnms
-        string.(1:length(lrt.formulas)), # rownms
-        5, # pvalcol
-        3 # teststatcol
-    )
-    show(io, ct)
+
+    # the following was adapted from StatsModels#162
+    # from nalimilan
+    Δdf = lrt.tests.dofdiff
+    Δdev = lrt.tests.deviancediff
+
+    nc = 6
+    nr = length(lrt.formulas)
+    outrows = Matrix{String}(undef, nr+1, nc)
+
+    outrows[1, :] = ["",
+                    "model-dof",
+                    "deviance",
+                    "χ²",
+                    "χ²-dof",
+                    "P(>χ²)"] # colnms
+
+
+    outrows[2, :] = ["[1]",
+                    @sprintf("%.0d", lrt.dof[1]),
+                    @sprintf("%.4f", lrt.deviance[1]),
+                    " "," ", " "]
+
+    for i in 2:nr
+        outrows[i+1, :] = ["[$i]",
+                           @sprintf("%.0d", lrt.dof[i]),
+                           @sprintf("%.4f", lrt.deviance[i]),
+                           @sprintf("%.4f", Δdev[i-1]),
+                           @sprintf("%.0d", Δdf[i-1]),
+                           string(StatsBase.PValue(lrt.pvalues[i-1]))]
+    end
+    colwidths = length.(outrows)
+    max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc]
+    totwidth = sum(max_colwidths) + 2*5
+
+    println(io, '─'^totwidth)
+
+    for r in 1:nr+1
+        for c in 1:nc
+            cur_cell = outrows[r, c]
+            cur_cell_len = length(cur_cell)
+
+            padding = " "^(max_colwidths[c]-cur_cell_len)
+            if c > 1
+                padding = "  "*padding
+            end
+
+            print(io, padding)
+            print(io, cur_cell)
+        end
+        print(io, "\n")
+        r == 1 && println(io, '─'^totwidth)
+    end
+    print(io, '─'^totwidth)
 
     nothing
 end
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -1,3 +1,20 @@
+"""
+    allequal(x::Array)
+    allequal(x::Tuple)
+Return the equality of all elements of the array
+"""
+function allequal(x::Array; comparison=isequal)::Bool
+    all(comparison.(first(x),  x))
+end
+
+function allequal(x::Tuple; comparison=isequal)::Bool
+    all(comparison.(first(x),  x))
+end
+
+function allequal(x...; comparison=isequal)::Bool
+    all(comparison.(first(x),  x))
+end
+
 """
     average(a::T, b::T) where {T<:AbstractFloat}
 
@@ -88,7 +105,7 @@ function checkindprsk(k::Integer)
 end
 
 """
-    replicate(f::Function, n::Integer, use_threads=false)
+    replicate(f::Function, n::Integer; use_threads=false)
 
 Return a vector of the values of `n` calls to `f()` - used in simulations where the value of `f` is stochastic.
 
diff --git a/test/Project.toml b/test/Project.toml
@@ -2,6 +2,7 @@
 BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Feather = "becb17da-46f6-5d3c-ad1b-1c5fe96bc73c"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl
@@ -1,4 +1,5 @@
 using MixedModels, Test
+import GLM: ProbitLink
 
 @testset "likelihoodratio test" begin
     slp = MixedModels.dataset(:sleepstudy);
@@ -13,11 +14,35 @@ using MixedModels, Test
     @test sum(map(length,lrt.pvalues)) == 1
     @test sum(map(length,lrt.models)) == 4
     @test length(lrt.formulae) == 2
+    show(IOBuffer(),lrt);
 
+
+    # mix of REML and ML
     fm0 = fit(MixedModel,@formula(reaction ~ 1 + (1+days|subj)),slp, REML=true);
+    @test_throws ArgumentError MixedModels.likelihoodratiotest(fm0,fm1)
+
+    # differing FE with REML
     fm1 = fit(MixedModel,@formula(reaction ~ 1 + days + (1+days|subj)),slp, REML=true);
     fm10 = fit(MixedModel,@formula(reaction ~ 1 + days + (1|subj)),slp, REML=true);
-
     @test_throws ArgumentError MixedModels.likelihoodratiotest(fm0,fm1);
-    lrt =  MixedModels.likelihoodratiotest(fm1,fm10);
+
+    contra = MixedModels.dataset(:contra);
+    gm0 = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urbdist)), contra, Bernoulli(), fast=true);
+    gm1 = fit(MixedModel, @formula(use ~ 1+age+abs2(age)+urban+livch+(1|urbdist)), contra, Bernoulli(), fast=true);
+    lrt = MixedModels.likelihoodratiotest(gm0,gm1);
+    @test [deviance(gm0), deviance(gm1)] == lrt.deviance
+    @test deviance(gm0) - deviance(gm1) == first(lrt.tests.deviancediff)
+    @test first(lrt.tests.dofdiff) == 1
+    @test sum(map(length,lrt.tests)) == 3
+    @test sum(map(length,lrt.pvalues)) == 1
+    @test sum(map(length,lrt.models)) == 4
+    @test length(lrt.formulae) == 2
+
+    # mismatched links
+    gm_probit = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urbdist)), contra, Bernoulli(), ProbitLink(), fast=true);
+    @test_throws ArgumentError MixedModels.likelihoodratiotest(gm0,gm_probit)
+
+    # mismatched families
+    gm_poisson = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urbdist)), contra, Poisson(), fast=true);
+    @test_throws ArgumentError MixedModels.likelihoodratiotest(gm0,gm_poisson)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -11,3 +11,4 @@ include("pirls.jl")
 include("gausshermite.jl")
 include("fit.jl")
 include("missing.jl")
+include("likelihoodratiotest.jl")