diff --git a/src/MixedModels.jl b/src/MixedModels.jl index 7fcad200f..cfaf66bda 100644 --- a/src/MixedModels.jl +++ b/src/MixedModels.jl @@ -144,6 +144,7 @@ include("randomeffectsterm.jl") include("linearmixedmodel.jl") include("gausshermite.jl") include("generalizedlinearmixedmodel.jl") +include("mixedmodel.jl") include("likelihoodratiotest.jl") include("linalg/statschol.jl") include("linalg/cholUnblocked.jl") diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 71edb1e44..4ebb181e9 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -53,10 +53,19 @@ struct GeneralizedLinearMixedModel{T<:AbstractFloat} <: MixedModel{T} mult::Vector{T} end -StatsBase.coefnames(m::GeneralizedLinearMixedModel) = coefnames(m.LMM) - -StatsBase.coeftable(m::GeneralizedLinearMixedModel) = coeftable(m.LMM) - +function StatsBase.coeftable(m::GeneralizedLinearMixedModel) + co = fixef(m) + se = stderror(m) + z = co ./ se + pvalue = ccdf.(Chisq(1), abs2.(z)) + CoefTable( + hcat(co, se, z, pvalue), + ["Estimate", "Std.Error", "z value", "P(>|z|)"], + coefnames(m), + 4, # pvalcol + 3, # teststatcol + ) +end """ deviance(m::GeneralizedLinearMixedModel{T}, nAGQ=1)::T where {T} @@ -73,7 +82,7 @@ function StatsBase.deviance(m::GeneralizedLinearMixedModel{T}, nAGQ = 1) where { u = vec(first(m.u)) u₀ = vec(first(m.u₀)) copyto!(u₀, u) - ra = RaggedArray(m.resp.devresid, first(m.LMM.reterms).refs) + ra = RaggedArray(m.resp.devresid, first(m.LMM.allterms).refs) devc0 = sum!(map!(abs2, m.devc0, u), ra) # the deviance components at z = 0 sd = map!(inv, m.sd, m.LMM.L[Block(1, 1)].diag) mult = fill!(m.mult, 0) @@ -112,16 +121,15 @@ function deviance!(m::GeneralizedLinearMixedModel, nAGQ = 1) deviance(m, nAGQ) end -function GLM.dispersion(m::GeneralizedLinearMixedModel, sqr::Bool = false) +function GLM.dispersion(m::GeneralizedLinearMixedModel{T}, sqr::Bool = false) where {T} # adapted from GLM.dispersion(::AbstractGLM, ::Bool) # TODO: PR for a GLM.dispersion(resp::GLM.GlmResp, dof_residual::Int, sqr::Bool) r = m.resp if dispersion_parameter(r.d) - wrkwt, wrkresid = r.wrkwt, r.wrkresid - s = sum(i -> wrkwt[i] * abs2(wrkresid[i]), eachindex(wrkwt, wrkresid)) / dof_residual(m) + s = sum(wt * abs2(re) for (wt, re) in zip(r.wrkwt, r.wrkresid)) / dof_residual(m) sqr ? s : sqrt(s) else - one(eltype(r.mu)) + one(T) end end @@ -391,7 +399,11 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol) m.β elseif s ∈ (:σ, :sigma) sdest(m) - elseif s ∈ (:A, :L, :λ, :lowerbd, :corr, :vcov, :PCA, :rePCA, :optsum, :X, :reterms, :feterms, :formula, :σs, :σρs) + elseif s == :σs + σs(m) + elseif s == :σρs + σρs(m) + elseif s ∈ (:A, :L, :λ, :lowerbd, :corr, :PCA, :rePCA, :optsum, :X, :reterms, :feterms, :formula) getproperty(m.LMM, s) elseif s == :y m.resp.y @@ -401,18 +413,17 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol) end function StatsBase.loglikelihood(m::GeneralizedLinearMixedModel{T}) where {T} - accum = zero(T) + r = m.resp D = Distribution(m.resp) - if D <: Binomial - for (μ, y, n) in zip(m.resp.mu, m.resp.y, m.wt) - accum += logpdf(D(round(Int, n), μ), round(Int, y * n)) + accum = ( + if D <: Binomial + sum(logpdf(D(round(Int, n), μ), round(Int, y * n)) + for (μ, y, n) in zip(r.mu, r.y, m.wt)) + else + sum(logpdf(D(μ), y) for (μ, y) in zip(r.mu, r.y)) end - else - for (μ, y) in zip(m.resp.mu, m.resp.y) - accum += logpdf(D(μ), y) - end - end - accum - (mapreduce(u -> sum(abs2, u), +, m.u) + logdet(m)) / 2 + ) + accum - (sum(sum(abs2, u) for u in m.u) + logdet(m)) / 2 end StatsBase.nobs(m::GeneralizedLinearMixedModel) = length(m.η) @@ -589,16 +600,14 @@ varest(m::GeneralizedLinearMixedModel{T}) where {T} = one(T) # delegate GLMM method to LMM field for f in ( - :describeblocks, :feL, + :fetrm, :(LinearAlgebra.logdet), :lowerbd, :PCA, :rePCA, + :(StatsBase.coefnames), :(StatsModels.modelmatrix), - :(StatsBase.vcov), - :σs, - :σρs, ) @eval begin $f(m::GeneralizedLinearMixedModel) = $f(m.LMM) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index efb2d5164..6242c98cd 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -187,13 +187,6 @@ function StatsBase.coeftable(m::LinearMixedModel) ) end -""" - cond(m::MixedModel) - -Return a vector of condition numbers of the λ matrices for the random-effects terms -""" -LinearAlgebra.cond(m::MixedModel) = cond.(m.λ) - """ condVar(m::LinearMixedModel) @@ -251,8 +244,7 @@ function createAL(allterms::Vector{Union{ReMat{T},FeMat{T}}}) where {T} A, L end - -StatsBase.deviance(m::MixedModel) = objective(m) +StatsBase.deviance(m::LinearMixedModel) = objective(m) GLM.dispersion(m::LinearMixedModel, sqr::Bool = false) = sqr ? varest(m) : sdest(m) @@ -270,14 +262,14 @@ function StatsBase.dof_residual(m::LinearMixedModel)::Int end """ - feind(m::MixedModel) + feind(m::LinearMixedModel) An internal utility to return the index in `m.allterms` of the fixed-effects term. """ -feind(m::MixedModel) = findfirst(Base.Fix2(isa, FeMat), m.allterms) +feind(m::LinearMixedModel) = findfirst(Base.Fix2(isa, FeMat), m.allterms) """ - feL(m::MixedModel) + feL(m::LinearMixedModel) Return the lower Cholesky factor for the fixed-effects parameters, as an `LowerTriangular` `p × p` matrix. @@ -292,7 +284,7 @@ end Return the fixed-effects term from `m.allterms` """ -fetrm(m) = m.allterms[feind(m)] +fetrm(m::LinearMixedModel) = m.allterms[feind(m)] """ fit!(m::LinearMixedModel[; verbose::Bool=false, REML::Bool=false]) @@ -765,16 +757,6 @@ function Base.show(io::IO, m::LinearMixedModel) show(io, coeftable(m)) end -function σs(m::LinearMixedModel) - σ = sdest(m) - NamedTuple{fnames(m)}(((σs(t, σ) for t in m.reterms)...,)) -end - -function σρs(m::LinearMixedModel) - σ = sdest(m) - NamedTuple{fnames(m)}(((σρs(t, σ) for t in m.reterms)...,)) -end - """ size(m::LinearMixedModel) @@ -815,6 +797,8 @@ end std(m::MixedModel) Return the estimated standard deviations of the random effects as a `Vector{Vector{T}}`. + +FIXME: This uses an old convention of isfinite(sdest(m)). Probably drop in favor of m.σs """ function Statistics.std(m::LinearMixedModel) rl = rowlengths.(m.reterms) @@ -919,32 +903,6 @@ Returns the estimate of σ², the variance of the conditional distribution of Y """ varest(m::LinearMixedModel) = pwrss(m) / ssqdenom(m) -""" - vcov(m::LinearMixedModel) - -Returns the variance-covariance matrix of the fixed effects. -If `corr=true`, then correlation of fixed effects is returned instead. -""" -function StatsBase.vcov(m::LinearMixedModel{T}; corr=false) where {T} - Xtrm = fetrm(m) - iperm = invperm(Xtrm.piv) - p = length(iperm) - r = Xtrm.rank - Linv = inv(feL(m)) - permvcov = varest(m) * (Linv'Linv) - if p == Xtrm.rank - vv = permvcov[iperm, iperm] - else - covmat = fill(zero(T) / zero(T), (p, p)) - for j = 1:r, i = 1:r - covmat[i, j] = permvcov[i, j] - end - vv = covmat[iperm, iperm] - end - - corr ? StatsBase.cov2cor!(vv, stderror(m)) : vv -end - """ zerocorr!(m::LinearMixedModel[, trmnms::Vector{Symbol}]) diff --git a/src/mixedmodel.jl b/src/mixedmodel.jl new file mode 100644 index 000000000..a86c7a45e --- /dev/null +++ b/src/mixedmodel.jl @@ -0,0 +1,44 @@ + +""" +cond(m::MixedModel) + +Return a vector of condition numbers of the λ matrices for the random-effects terms +""" +LinearAlgebra.cond(m::MixedModel) = cond.(m.λ) + +function σs(m::MixedModel) + σ = dispersion(m) + NamedTuple{fnames(m)}(((σs(t, σ) for t in m.reterms)...,)) +end + +function σρs(m::MixedModel) + σ = dispersion(m) + NamedTuple{fnames(m)}(((σρs(t, σ) for t in m.reterms)...,)) +end + +""" + vcov(m::LinearMixedModel) + +Returns the variance-covariance matrix of the fixed effects. +If `corr=true`, then correlation of fixed effects is returned instead. +""" +function StatsBase.vcov(m::MixedModel; corr=false) + Xtrm = fetrm(m) + iperm = invperm(Xtrm.piv) + p = length(iperm) + r = Xtrm.rank + Linv = inv(feL(m)) + T = eltype(Linv) + permvcov = dispersion(m, true) * (Linv'Linv) + if p == Xtrm.rank + vv = permvcov[iperm, iperm] + else + covmat = fill(zero(T) / zero(T), (p, p)) + for j = 1:r, i = 1:r + covmat[i, j] = permvcov[i, j] + end + vv = covmat[iperm, iperm] + end + + corr ? StatsBase.cov2cor!(vv, stderror(m)) : vv +end diff --git a/test/pirls.jl b/test/pirls.jl index 5f0fbec90..d90e11408 100644 --- a/test/pirls.jl +++ b/test/pirls.jl @@ -77,3 +77,34 @@ end #@test isapprox(sum(x -> sum(abs2, x), gm4.u), 196.8695297987013, atol=0.1) #@test isapprox(sum(gm4.resp.devresid), 220.92685781326136, atol=0.1) end + +@testset "goldstein" begin # from a 2020-04-22 msg by Ben Goldstein to R-SIG-Mixed-Models + goldstein = + categorical!( + DataFrame( + group = repeat(1:10, outer=10), + y = [ + 83, 3, 8, 78, 901, 21, 4, 1, 1, 39, + 82, 3, 2, 82, 874, 18, 5, 1, 3, 50, + 87, 7, 3, 67, 914, 18, 0, 1, 1, 38, + 86, 13, 5, 65, 913, 13, 2, 0, 0, 48, + 90, 5, 5, 71, 886, 19, 3, 0, 2, 32, + 96, 1, 1, 87, 860, 21, 3, 0, 1, 54, + 83, 2, 4, 70, 874, 19, 5, 0, 4, 36, + 100, 11, 3, 71, 950, 21, 6, 0, 1, 40, + 89, 5, 5, 73, 859, 29, 3, 0, 2, 38, + 78, 13, 6, 100, 852, 24, 5, 0, 1, 39 + ], + ), + :group, + ) + gform = @formula(y ~ 1 + (1|group)) + m1 = fit(MixedModel, gform, goldstein, Poisson()) + @test deviance(m1) ≈ 193.5587302384811 rtol=1.e-5 + @test only(m1.β) ≈ 4.192196439077657 atol=1.e-5 + @test only(m1.θ) ≈ 1.838245201739852 atol=1.e-5 + m11 = fit(MixedModel, gform, goldstein, Poisson(), nAGQ=11) + @test deviance(m11) ≈ 193.51028088736842 rtol=1.e-5 + @test only(m11.β) ≈ 4.192196439077657 atol=1.e-5 + @test only(m11.θ) ≈ 1.838245201739852 atol=1.e-5 +end \ No newline at end of file