From 24b74f3a1e3ee0090c4862eee3db49ac563208db Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 30 Sep 2024 19:35:23 -0500 Subject: [PATCH 1/4] use dispatch instead of type testing in _rand --- src/simulate.jl | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/simulate.jl b/src/simulate.jl index 035a8c02c..ae7cbcbac 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -75,18 +75,24 @@ Note that `n` is the `n` parameter for the Binomial distribution, *not* the number of draws from the RNG. It is then used to change the random draw (an integer in [0, n]) into a probability (a float in [0,1]). """ -function _rand(rng::AbstractRNG, d::Distribution, location, scale=NaN, n=1) - if !ismissing(scale) - throw(ArgumentError("Families with a dispersion parameter not yet supported")) - end +function _rand(rng::AbstractRNG, ::Binomial, location, scale=missing, n=1) + dist = Binomial(Int(n), location) + return rand(rng, dist) / n +end - if d isa Binomial - dist = Binomial(Int(n), location) - else - dist = typeof(d)(location) - end +function _rand(rng::AbstractRNG, ::T, location, scale=missing, n=1) where T <: Union{Bernoulli,Poisson} + dist = T(location) + return rand(rng, dist) +end - return rand(rng, dist) / n +function _rand(rng::AbstractRNG, ::Normal, location, scale=missing, n=1) + # skip constructing a distribution + # return scale * randn(rng) + location + throw(ArgumentError("Families with a dispersion parameter not yet supported")) +end + +function _rand(::AbstractRNG, ::T, ::Any, scale=missing, n=1) where T <: Distribution + throw(ArgumentError("Families with a dispersion parameter not yet supported")) end function simulate!(m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]) where {T} From cf3cf0077552f49d0f0a52545ddc02d167f6b4a2 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 30 Sep 2024 20:04:59 -0500 Subject: [PATCH 2/4] allow simulating from a constructed normal with nonidentity link model --- src/simulate.jl | 3 +-- test/bootstrap.jl | 11 ++++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/simulate.jl b/src/simulate.jl index ae7cbcbac..3ec4474ee 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -87,8 +87,7 @@ end function _rand(rng::AbstractRNG, ::Normal, location, scale=missing, n=1) # skip constructing a distribution - # return scale * randn(rng) + location - throw(ArgumentError("Families with a dispersion parameter not yet supported")) + return scale * randn(rng) + location end function _rand(::AbstractRNG, ::T, ::Any, scale=missing, n=1) where T <: Distribution diff --git a/test/bootstrap.jl b/test/bootstrap.jl index 49c28441f..f6f65b5b0 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -65,9 +65,14 @@ end @test isapprox(gm2.β, gm2sim.β; atol=norm(stderror(gm2))) end @testset "_rand with dispersion" begin - @test_throws ArgumentError MixedModels._rand(StableRNG(42), Normal(), 1, 1, 1) - @test_throws ArgumentError MixedModels._rand(StableRNG(42), Gamma(), 1, 1, 1) - @test_throws ArgumentError MixedModels._rand(StableRNG(42), InverseGaussian(), 1, 1, 1) + @test_throws ArgumentError MixedModels._rand(StableRNG(42), Gamma(), 1) + @test_throws ArgumentError MixedModels._rand(StableRNG(42), InverseGaussian(), 1) + + data = (; y=fill(1, 1000), g=rand('A':'G', 1000)) + m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Normal(), LogLink()) + simulate!(m; β=[1], θ=[0], σ=1) + v = response(m) + @test mean(v) ≈ exp(1) atol=0.05 end end From 6971b8ac8dbd68b0bb21cec2a98574a785f1e413 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 30 Sep 2024 20:38:43 -0500 Subject: [PATCH 3/4] allow simulating from more distributions --- src/simulate.jl | 2 +- test/bootstrap.jl | 27 +++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/simulate.jl b/src/simulate.jl index 3ec4474ee..35e77396b 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -91,7 +91,7 @@ function _rand(rng::AbstractRNG, ::Normal, location, scale=missing, n=1) end function _rand(::AbstractRNG, ::T, ::Any, scale=missing, n=1) where T <: Distribution - throw(ArgumentError("Families with a dispersion parameter not yet supported")) + throw(ArgumentError("$(nameof(T)) distribution is not currently supported")) end function simulate!(m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]) where {T} diff --git a/test/bootstrap.jl b/test/bootstrap.jl index f6f65b5b0..0102b530d 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -65,14 +65,33 @@ end @test isapprox(gm2.β, gm2sim.β; atol=norm(stderror(gm2))) end @testset "_rand with dispersion" begin - @test_throws ArgumentError MixedModels._rand(StableRNG(42), Gamma(), 1) - @test_throws ArgumentError MixedModels._rand(StableRNG(42), InverseGaussian(), 1) + @test_throws ArgumentError MixedModels._rand(StableRNG(42), NegativeBinomial(), 1, 1) - data = (; y=fill(1, 1000), g=rand('A':'G', 1000)) + n = 1000 + data = (; y=fill(1, n), g=rand('A':'G', n)) m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Normal(), LogLink()) - simulate!(m; β=[1], θ=[0], σ=1) + simulate!(StableRNG(42), m; β=[1], θ=[0], σ=1) v = response(m) + # inverse link is exp @test mean(v) ≈ exp(1) atol=0.05 + + m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Gamma()) + # with StableRNG(42) we actually get a few negative values?!? + simulate!(StableRNG(43), m; β=[1], θ=[0], σ=1) + v = response(m) + # inverse link of the reciprocal link is the reciprocal, which is just 1 here + @test mean(v) ≈ 1 atol=0.05 + # m0 = glm(fill(1, n, 1), v, Gamma()) + # @test only(coef(m0)) ≈ 1 atol=0.05 + + m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, InverseGaussian()) + # with StableRNG(42) we actually get a few negative values?!? + simulate!(StableRNG(43), m; β=[1], θ=[0], σ=1) + v = response(m) + # canonical link is InverseSquareLink(), but 1 is a fixed point under that link + @test mean(v) ≈ 1 atol=0.05 + # m0 = glm(fill(1, n, 1), v, InverseGaussian()) + # @test only(coef(m0)) ≈ 1 atol=0.05 end end From 55f5e2d7eb3908f14572a445b78fcfdc32f6a7dc Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 30 Sep 2024 21:22:48 -0500 Subject: [PATCH 4/4] support for geometric distribution --- src/MixedModels.jl | 3 ++- src/generalizedlinearmixedmodel.jl | 20 +++++++++++++++----- src/simulate.jl | 18 +++++++++++++++--- test/bootstrap.jl | 17 +++++++++++++++++ 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/MixedModels.jl b/src/MixedModels.jl index db3911bfe..377bb7b13 100644 --- a/src/MixedModels.jl +++ b/src/MixedModels.jl @@ -7,7 +7,7 @@ using BSplineKit: interpolate using Compat: @compat using DataAPI: DataAPI, levels, refpool, refarray, refvalue using Distributions: Distributions, Bernoulli, Binomial, Chisq, Distribution, Gamma -using Distributions: InverseGaussian, Normal, Poisson, ccdf +using Distributions: Geometric, InverseGaussian, Normal, Poisson, ccdf using GLM: GLM, GeneralizedLinearModel, IdentityLink, InverseLink, LinearModel using GLM: Link, LogLink, LogitLink, ProbitLink, SqrtLink using GLM: canonicallink, glm, linkinv, dispersion, dispersion_parameter @@ -55,6 +55,7 @@ export @formula, Grouping, Gamma, GeneralizedLinearMixedModel, + Geometric, HelmertCoding, HypothesisCoding, IdentityLink, diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index cfc06a778..f15acbf73 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -383,6 +383,20 @@ function GeneralizedLinearMixedModel( ) end +_distwarn(::T) where T <: Union{Bernoulli,Binomial,Poisson} = nothing +function _distwarn(::T) where T <: Union{Gamma,InverseGaussian,Normal} + @warn """Results for families with a dispersion parameter are not reliable. + It is best to avoid trying to fit such models in MixedModels until + the authors gain a better understanding of those cases.""" + return nothing +end + +function _distwarn(::Geometric) + @warn "The results for geometric family models have not been confirmed to be reliable." +end + +_distwarn(::T) where {T <: Distribution} = error("$(nameof(T)) distribution is not supported") + function GeneralizedLinearMixedModel( f::FormulaTerm, tbl::Tables.ColumnTable, @@ -400,11 +414,7 @@ function GeneralizedLinearMixedModel( ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink") ) - if !any(isa(d, dist) for dist in (Bernoulli, Binomial, Poisson)) - @warn """Results for families with a dispersion parameter are not reliable. - It is best to avoid trying to fit such models in MixedModels until - the authors gain a better understanding of those cases.""" - end + _distwarn(d) LMM = LinearMixedModel(f, tbl; contrasts, wts, amalgamate) y = copy(LMM.y) diff --git a/src/simulate.jl b/src/simulate.jl index 35e77396b..84ea43ac2 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -76,14 +76,25 @@ Note that `n` is the `n` parameter for the Binomial distribution, random draw (an integer in [0, n]) into a probability (a float in [0,1]). """ function _rand(rng::AbstractRNG, ::Binomial, location, scale=missing, n=1) - dist = Binomial(Int(n), location) + dist = Binomial(Int(n), location) return rand(rng, dist) / n end function _rand(rng::AbstractRNG, ::T, location, scale=missing, n=1) where T <: Union{Bernoulli,Poisson} dist = T(location) return rand(rng, dist) -end +end + +function _rand(rng::AbstractRNG, ::Geometric, location, scale=missing, n=1) + dist = Geometric(location) + return rand(rng, dist) +end + +function _rand(rng::AbstractRNG, ::T, location, scale, n=1) where T <: Union{Gamma,InverseGaussian} + @warn "Gamma and Inverse Gaussian are not supported in fitting and poorly tested in simulation" maxwarn=1 + dist = T(location, scale) + return rand(rng, dist) +end function _rand(rng::AbstractRNG, ::Normal, location, scale=missing, n=1) # skip constructing a distribution @@ -280,7 +291,8 @@ function _simulate!( # families with a dispersion parameter mul!(η, fullrankx(lm), β, one(T), one(T)) - μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu + @info "" resp + μ = isnothing(resp) ? GLM.linkfun.(Link(m), η) : GLM.updateμ!(resp, η).mu # convert to the distribution / add in noise @inbounds for (idx, val) in enumerate(μ) diff --git a/test/bootstrap.jl b/test/bootstrap.jl index 0102b530d..df569560d 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -64,6 +64,23 @@ end gm2sim = refit!(simulate!(StableRNG(42), deepcopy(gm2)); fast=true, progress=false) @test isapprox(gm2.β, gm2sim.β; atol=norm(stderror(gm2))) end + + @testset "Geometric" begin + n = 1000 + data = (; y=fill(1, n), g=rand('A':'G', n)) + data = (; y=rand(StableRNG(42), Geometric(0.5), n), + g=rand('A':'G', n)) + @test_logs (:warn, r"geometric") MixedModel(@formula(y ~ 1 + (1|g)), data, Geometric()) + m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Geometric()) + v = simulate(StableRNG(42), m; β=[exp(0.5)], θ=[0]) + gmean = mean(rand(StableRNG(42), Geometric(), n)) + @test mean(v) ≈ gmean atol=0.005 + + simulate!(StableRNG(42), m; β=[exp(0.5)], θ=[0]) + v = response(m) + @test mean(v) ≈ gmean atol=0.005 + end + @testset "_rand with dispersion" begin @test_throws ArgumentError MixedModels._rand(StableRNG(42), NegativeBinomial(), 1, 1)