Skip to content

Commit 72b45b9

Browse files
paldaydmbates
andauthored
Bugfixes and last set of backports for 2.x (#399)
* warning for GLMM with dispersion parameter (#373)* warning for GLMM with dispersion parameter(cherry picked from commit 66b8863) * correct coeftable for GLMMs, add tests (#308) * Move methods for abstract MixedModel struct to separate file. (cherry picked from commit 5fb6f65) * chang CoefTable column names to match GLM.jl * ranef method for GLMM (#336) (cherry picked from commit 753aa6f) * make (1|x) + (y|x) equivalent to (1|x) + (0+y|x) (#338) * fix error with (1|x) + (y|x) * fix typo (cherry picked from commit 20c83ef) * update ranef docstring Co-authored-by: Douglas Bates <[email protected]>
1 parent e7bef62 commit 72b45b9

File tree

6 files changed

+164
-71
lines changed

6 files changed

+164
-71
lines changed

src/MixedModels.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ include("linearmixedmodel.jl")
117117
include("gausshermite.jl")
118118
include("generalizedlinearmixedmodel.jl")
119119
include("mixed.jl")
120+
include("mixedmodel.jl")
120121
include("linalg/statschol.jl")
121122
include("linalg/cholUnblocked.jl")
122123
include("linalg/rankUpdate.jl")

src/generalizedlinearmixedmodel.jl

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ function StatsBase.deviance(m::GeneralizedLinearMixedModel{T}, nAGQ=1) where {T}
6868
u = vec(first(m.u))
6969
u₀ = vec(first(m.u₀))
7070
copyto!(u₀, u)
71-
ra = RaggedArray(m.resp.devresid, first(m.LMM.reterms).refs)
71+
ra = RaggedArray(m.resp.devresid, first(m.LMM.allterms).refs)
7272
devc0 = sum!(map!(abs2, m.devc0, u), ra) # the deviance components at z = 0
7373
sd = map!(inv, m.sd, m.LMM.L[Block(1,1)].diag)
7474
mult = fill!(m.mult, 0)
@@ -105,8 +105,17 @@ function deviance!(m::GeneralizedLinearMixedModel, nAGQ=1)
105105
deviance(m, nAGQ)
106106
end
107107

108-
GLM.dispersion(m::GeneralizedLinearMixedModel, sqr::Bool=false) =
109-
dispersion(m.resp, dof_residual(m), sqr)
108+
function GLM.dispersion(m::GeneralizedLinearMixedModel{T}, sqr::Bool = false) where {T}
109+
# adapted from GLM.dispersion(::AbstractGLM, ::Bool)
110+
# TODO: PR for a GLM.dispersion(resp::GLM.GlmResp, dof_residual::Int, sqr::Bool)
111+
r = m.resp
112+
if dispersion_parameter(r.d)
113+
s = sum(wt * abs2(re) for (wt, re) in zip(r.wrkwt, r.wrkresid)) / dof_residual(m)
114+
sqr ? s : sqrt(s)
115+
else
116+
one(T)
117+
end
118+
end
110119

111120
GLM.dispersion_parameter(m::GeneralizedLinearMixedModel) = dispersion_parameter(m.resp.d)
112121

@@ -233,9 +242,9 @@ GeneralizedLinearMixedModel(f::FormulaTerm, tbl,
233242
GeneralizedLinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable,
234243
d::Normal,
235244
l::IdentityLink;
236-
wts = [],
237-
offset = [],
238-
contrasts = Dict{Symbol,Any}()) =
245+
wts = wts,
246+
offset = offset,
247+
contrasts = contrasts) =
239248
throw(ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink"))
240249
function GeneralizedLinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable,
241250
d::Distribution,
@@ -247,7 +256,14 @@ function GeneralizedLinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable,
247256
d = Bernoulli()
248257
end
249258
(isa(d, Normal) && isa(l, IdentityLink)) &&
250-
throw(ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink"))
259+
throw(ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink"))
260+
261+
if !any(isa(d, dist) for dist in (Bernoulli, Binomial, Poisson))
262+
@warn """Results for families with a dispersion parameter are not reliable.
263+
It is best to avoid trying to fit such models in MixedModels until
264+
the authors get a better understanding of those cases."""
265+
end
266+
251267
LMM = LinearMixedModel(f, tbl, contrasts = contrasts; wts = wts)
252268
y = copy(LMM.y)
253269
# the sqrtwts field must be the correct length and type but we don't know those
@@ -288,7 +304,11 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol)
288304
m.β
289305
elseif s (, :sigma)
290306
sdest(m)
291-
elseif s (:A, :L, , :lowerbd, :optsum, :X, :reterms, :feterms, :formula, :σs, :σρs)
307+
elseif s == :σs
308+
σs(m)
309+
elseif s == :σρs
310+
σρs(m)
311+
elseif s (:A, :L, , :lowerbd, :corr, :PCA, :rePCA, :optsum, :X, :reterms, :feterms, :formula)
292312
getproperty(m.LMM, s)
293313
elseif s == :y
294314
m.resp.y
@@ -298,18 +318,17 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol)
298318
end
299319

300320
function StatsBase.loglikelihood(m::GeneralizedLinearMixedModel{T}) where {T}
301-
accum = zero(T)
321+
r = m.resp
302322
D = Distribution(m.resp)
303-
if D <: Binomial
304-
for (μ, y, n) in zip(m.resp.mu, m.resp.y, m.wt)
305-
accum += logpdf(D(round(Int, n), μ), round(Int, y * n))
306-
end
307-
else
308-
for (μ, y) in zip(m.resp.mu, m.resp.y)
309-
accum += logpdf(D(μ), y)
323+
accum = (
324+
if D <: Binomial
325+
sum(logpdf(D(round(Int, n), μ), round(Int, y * n))
326+
for (μ, y, n) in zip(r.mu, r.y, m.wt))
327+
else
328+
sum(logpdf(D(μ), y) for (μ, y) in zip(r.mu, r.y))
310329
end
311-
end
312-
accum - (mapreduce(u -> sum(abs2, u), + , m.u) + logdet(m)) / 2
330+
)
331+
accum - (sum(sum(abs2, u) for u in m.u) + logdet(m)) / 2
313332
end
314333

315334
StatsBase.nobs(m::GeneralizedLinearMixedModel) = length(m.η)
@@ -460,13 +479,14 @@ varest(m::GeneralizedLinearMixedModel{T}) where {T} = one(T)
460479
for f in (
461480
:describeblocks,
462481
:feL,
482+
:fetrm,
463483
:(LinearAlgebra.logdet),
464484
:lowerbd,
485+
:ranef,
486+
:rePCA,
487+
:(StatsBase.coefnames),
465488
:(StatsModels.modelmatrix),
466-
:(StatsBase.vcov),
467-
:σs,
468-
:σρs,
469-
)
489+
)
470490
@eval begin
471491
$f(m::GeneralizedLinearMixedModel) = $f(m.LMM)
472492
end

src/linearmixedmodel.jl

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,15 @@ function StatsBase.coeftable(m::MixedModel)
122122
se = stderror(m)
123123
z = co ./ se
124124
pvalue = ccdf.(Chisq(1), abs2.(z))
125-
CoefTable(hcat(co, se, z, pvalue), ["Estimate", "Std.Error", "z value", "P(>|z|)"],
126-
first(m.feterms).cnames, 4)
125+
CoefTable(
126+
hcat(co, se, z, pvalue),
127+
["Coef.", "Std. Error", "z", "Pr(>|z|)"],
128+
coefnames(m),
129+
4, # pvalcol
130+
3, # teststatcol
131+
)
127132
end
128133

129-
"""
130-
cond(m::MixedModel)
131-
132-
Return a vector of condition numbers of the λ matrices for the random-effects terms
133-
"""
134-
LinearAlgebra.cond(m::MixedModel) = cond.(m.λ)
135-
136134
"""
137135
condVar(m::LinearMixedModel)
138136
@@ -231,12 +229,29 @@ function StatsBase.dof_residual(m::LinearMixedModel)::Int
231229
end
232230

233231
"""
234-
feL(m::MixedModel)
232+
feind(m::LinearMixedModel)
233+
234+
An internal utility to return the index in `m.allterms` of the fixed-effects term.
235+
"""
236+
feind(m::LinearMixedModel) = findfirst(Base.Fix2(isa, FeMat), m.allterms)
237+
238+
"""
239+
feL(m::LinearMixedModel)
235240
236241
Return the lower Cholesky factor for the fixed-effects parameters, as an `LowerTriangular`
237242
`p × p` matrix.
238243
"""
239-
feL(m::LinearMixedModel) = LowerTriangular(m.L.blocks[end - 1, end - 1])
244+
function feL(m::LinearMixedModel)
245+
k = feind(m)
246+
LowerTriangular(m.L.blocks[k, k])
247+
end
248+
249+
"""
250+
fetrm(m::LinearMixedModel)
251+
252+
Return the fixed-effects term from `m.allterms`
253+
"""
254+
fetrm(m::LinearMixedModel) = m.allterms[feind(m)]
240255

241256
"""
242257
fit!(m::LinearMixedModel[; verbose::Bool=false, REML::Bool=false])
@@ -506,7 +521,7 @@ end
506521
ranef!(v::Vector, m::LinearMixedModel, uscale::Bool) = ranef!(v, m, fixef(m), uscale)
507522

508523
"""
509-
ranef(m::LinearMixedModel; uscale=false) #, named=true)
524+
ranef(m::MixedModel; uscale=false, named=false)
510525
511526
Return, as a `Vector{Vector{T}}` (`Vector{NamedVector{T}}` if `named=true`),
512527
the conditional modes of the random effects in model `m`.
@@ -636,16 +651,13 @@ function Base.show(io::IO, m::LinearMixedModel)
636651
show(io,coeftable(m))
637652
end
638653

639-
function σs(m::LinearMixedModel)
640-
σ = sdest(m)
641-
NamedTuple{fnames(m)}(((σs(t, σ) for t in m.reterms)...,))
642-
end
643-
644-
function σρs(m::LinearMixedModel)
645-
σ = sdest(m)
646-
NamedTuple{fnames(m)}(((σρs(t, σ) for t in m.reterms)...,))
647-
end
654+
"""
655+
size(m::LinearMixedModel)
648656
657+
Returns the size of a mixed model as a tuple of length four:
658+
the number of observations, the number of (non-singular) fixed-effects parameters,
659+
the number of conditional modes (random effects), the number of grouping variables
660+
"""
649661
function Base.size(m::LinearMixedModel)
650662
n, p = size(first(m.feterms))
651663
n, p, sum(size.(m.reterms, 2)), length(m.reterms)
@@ -660,10 +672,26 @@ This value is the contents of the `1 × 1` bottom right block of `m.L`
660672
"""
661673
sqrtpwrss(m::LinearMixedModel) = first(m.L.blocks[end, end])
662674

675+
676+
"""
677+
ssqdenom(m::LinearMixedModel)
678+
Return the denominator for penalized sums-of-squares.
679+
For MLE, this value is the number of observations. For REML, this
680+
value is the number of observations minus the rank of the fixed-effects matrix.
681+
The difference is analagous to the use of n or n-1 in the denominator when
682+
calculating the variance.
683+
"""
684+
function ssqdenom(m::LinearMixedModel)::Int
685+
(n, p, q, k) = size(m)
686+
m.optsum.REML ? n - p : n
687+
end
688+
663689
"""
664690
std(m::MixedModel)
665691
666692
Return the estimated standard deviations of the random effects as a `Vector{Vector{T}}`.
693+
694+
FIXME: This uses an old convention of isfinite(sdest(m)). Probably drop in favor of m.σs
667695
"""
668696
function Statistics.std(m::LinearMixedModel)
669697
rl = rowlengths.(m.reterms)
@@ -739,25 +767,7 @@ end
739767
740768
Returns the estimate of σ², the variance of the conditional distribution of Y given B.
741769
"""
742-
varest(m::LinearMixedModel) = pwrss(m) / dof_residual(m)
743-
744-
function StatsBase.vcov(m::LinearMixedModel{T}) where {T}
745-
Xtrm = first(m.feterms)
746-
iperm = invperm(Xtrm.piv)
747-
p = length(iperm)
748-
r = Xtrm.rank
749-
Linv = inv(feL(m))
750-
permvcov = varest(m) * (Linv'Linv)
751-
if p == Xtrm.rank
752-
permvcov[iperm, iperm]
753-
else
754-
covmat = fill(zero(T)/zero(T), (p, p))
755-
for j in 1:r, i in 1:r
756-
covmat[i,j] = permvcov[i, j]
757-
end
758-
covmat[iperm, iperm]
759-
end
760-
end
770+
varest(m::LinearMixedModel) = pwrss(m) / ssqdenom(m)
761771

762772
"""
763773
zerocorr!(m::LinearMixedModel[, trmnms::Vector{Symbol}])

src/mixedmodel.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
"""
3+
cond(m::MixedModel)
4+
5+
Return a vector of condition numbers of the λ matrices for the random-effects terms
6+
"""
7+
LinearAlgebra.cond(m::MixedModel) = cond.(m.λ)
8+
9+
function σs(m::MixedModel)
10+
σ = dispersion(m)
11+
NamedTuple{fnames(m)}(((σs(t, σ) for t in m.reterms)...,))
12+
end
13+
14+
function σρs(m::MixedModel)
15+
σ = dispersion(m)
16+
NamedTuple{fnames(m)}(((σρs(t, σ) for t in m.reterms)...,))
17+
end
18+
19+
"""
20+
vcov(m::LinearMixedModel)
21+
22+
Returns the variance-covariance matrix of the fixed effects.
23+
If `corr=true`, then correlation of fixed effects is returned instead.
24+
"""
25+
function StatsBase.vcov(m::MixedModel; corr=false)
26+
Xtrm = fetrm(m)
27+
iperm = invperm(Xtrm.piv)
28+
p = length(iperm)
29+
r = Xtrm.rank
30+
Linv = inv(feL(m))
31+
T = eltype(Linv)
32+
permvcov = dispersion(m, true) * (Linv'Linv)
33+
if p == Xtrm.rank
34+
vv = permvcov[iperm, iperm]
35+
else
36+
covmat = fill(zero(T) / zero(T), (p, p))
37+
for j = 1:r, i = 1:r
38+
covmat[i, j] = permvcov[i, j]
39+
end
40+
vv = covmat[iperm, iperm]
41+
end
42+
43+
corr ? StatsBase.cov2cor!(vv, stderror(m)) : vv
44+
end

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[deps]
22
BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
33
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
4+
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
45
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
56
NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673"
67
RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"

test/pirls.jl

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
using DataFrames, LinearAlgebra, MixedModels, RData, Test
22

3+
using GLM: SqrtLink
4+
35
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
46
const dat = Dict(Symbol(k) => v for (k, v) in
57
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
@@ -12,12 +14,19 @@ end
1214
gm0 = fit(MixedModel, contraform, contra, Bernoulli(), fast=true);
1315
@test gm0.lowerbd == zeros(1)
1416
@test isapprox(gm0.θ, [0.5720734451352923], atol=0.001)
15-
@test isapprox(deviance(gm0,true), 2361.657188518064, atol=0.001)
17+
@test isapprox(deviance(gm0), 2361.657188518064, atol=0.001)
18+
# the first 9 BLUPs -- I don't think there's much point in testing all 102
19+
blups = [-0.9546542382159384, -0.9243280740976597, -0.5994415212823175,
20+
-0.5853637765024964, -0.5582547143159903, -0.5230879394455542,
21+
-0.5180165911218682, -0.5175868781594128, -0.4955043890201569]
22+
@test sort(vec(first(ranef(gm0))))[1:9] blups atol=1e-4
23+
1624
gm1 = fit(MixedModel, contraform, contra, Bernoulli());
1725
@test isapprox(gm1.θ, [0.573054], atol=0.005)
1826
@test lowerbd(gm1) == vcat(fill(-Inf, 7), 0.)
1927
@test isapprox(deviance(gm1,true), 2361.54575, rtol=0.00001)
2028
@test isapprox(loglikelihood(gm1), -1180.77288, rtol=0.00001)
29+
2130
@test dof(gm0) == length(gm0.β) + length(gm0.θ)
2231
@test nobs(gm0) == 1934
2332
fit!(gm0, fast=true, nAGQ=7)
@@ -29,13 +38,11 @@ end
2938
@test isnan(gm0.σ)
3039
@test length(gm0.y) == size(gm0.X, 1)
3140
@test in propertynames(gm0)
32-
gm0.β = gm0.beta
33-
@test gm0.β == gm0.beta
34-
gm0.θ = gm0.theta
35-
@test gm0.θ == gm0.theta
36-
gm0.βθ = vcat(gm0.β, gm0.theta)
37-
@test gm0.β == gm0.beta
38-
@test gm0.θ == gm0.theta
41+
42+
@testset "GLMM rePCA" begin
43+
@test length(MixedModels.rePCA(gm0)) == 1
44+
@test length(gm0.rePCA) == 1
45+
end
3946
# the next three values are not well defined in the optimization
4047
#@test isapprox(logdet(gm1), 75.7217, atol=0.1)
4148
#@test isapprox(sum(abs2, gm1.u[1]), 48.4747, atol=0.1)
@@ -49,7 +56,7 @@ end
4956
gm2 = fit(MixedModel, @formula(prop ~ 1 + p + (1|h)), cbpp, Binomial(), wts = cbpp[!,:s])
5057
@test isapprox(deviance(gm2,true), 100.09585619324639, atol=0.0001)
5158
@test isapprox(sum(abs2, gm2.u[1]), 9.723175126731014, atol=0.0001)
52-
@test isapprox(logdet(gm2), 16.90099, atol=0.0001)
59+
@test isapprox(logdet(gm2), 16.90099, atol=0.001)
5360
@test isapprox(sum(gm2.resp.devresid), 73.47179193718736, atol=0.001)
5461
@test isapprox(loglikelihood(gm2), -92.02628186555876, atol=0.001)
5562
@test isnan(sdest(gm2))
@@ -76,3 +83,13 @@ end
7683
#@test isapprox(sum(x -> sum(abs2, x), gm4.u), 196.8695297987013, atol=0.1)
7784
#@test isapprox(sum(gm4.resp.devresid), 220.92685781326136, atol=0.1)
7885
end
86+
87+
@testset "dispersion" begin
88+
89+
form = @formula(Y ~ 1 + U + (1 + U | G))
90+
91+
@test_logs (:warn, r"dispersion parameter") GeneralizedLinearMixedModel(form, dat[:sleepstudy], Gamma())
92+
@test_logs (:warn, r"dispersion parameter") GeneralizedLinearMixedModel(form, dat[:sleepstudy], InverseGaussian())
93+
@test_logs (:warn, r"dispersion parameter") GeneralizedLinearMixedModel(form, dat[:sleepstudy], Normal(), SqrtLink())
94+
95+
end

0 commit comments

Comments
 (0)