Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/glmfit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link,W<:AbstractWeights}
wrkresid::V
end

link(rr::GlmResp) = rr.d
distr(rr::GlmResp) = rr.d

function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector,D,L,W}
n = length(y)
Expand Down Expand Up @@ -79,7 +79,7 @@ end
function deviance(r::GlmResp)
wts = weights(r)
d = sum(r.devresid)
return wts isa ProbabilityWeights ? d * nobs(r) / sum(wts) : d
return wts isa ProbabilityWeights ? d * sum(!iszero, wts) / sum(wts) : d
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this change unnecessary since nobs is fixed below?

Suggested change
return wts isa ProbabilityWeights ? d * sum(!iszero, wts) / sum(wts) : d
return wts isa ProbabilityWeights ? d * nobs(r) / sum(wts) : d

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was because I defined the method only on LinPredModel and not on the response object. But I've discovered that we have ModResp that covers both LMs and GLMs so I've changed the method.

end

weights(r::GlmResp) = r.wts
Expand Down Expand Up @@ -342,7 +342,7 @@ function loglikelihood(m::AbstractGLM)
y = r.y
mu = r.mu
wts = weights(r)
d = link(r)
d = distr(r)
ll = zero(eltype(mu))
N = length(y)
δ = deviance(r)
Expand Down Expand Up @@ -860,7 +860,8 @@ function checky(y, d::Binomial)
end

function nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:AbstractWeights}
return oftype(sum(one(eltype(weights(r)))), length(r.y))
n = W<:ProbabilityWeights ? sum(!iszero, weights(r)) : length(r.y)
return oftype(sum(one(eltype(weights(r)))), n)
end
nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts)

Expand All @@ -884,7 +885,7 @@ end
function momentmatrix(m::GeneralizedLinearModel)
X = modelmatrix(m; weighted=false)
r = varstruct(m)
if link(m) isa Union{Gamma,InverseGaussian}
if distr(m) isa Union{Gamma,InverseGaussian}
r .*= sum(working_weights(m)) / sum(abs2, r)
end
return Diagonal(r) * X
Expand Down
17 changes: 14 additions & 3 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ function vcov(x::LinPredModel)
s = nobs(x) / (nobs(x) - 1)
mm = momentmatrix(x)
A = invloglikhessian(x)
if link(x) isa Union{Gamma,InverseGaussian}
if distr(x) isa Union{Gamma,InverseGaussian}
r = varstruct(x)
A ./= sum(working_weights(x)) / sum(abs2, r)
end
Expand All @@ -326,7 +326,7 @@ function vcov(x::LinPredModel)
end
end

link(x::LinPredModel) = link(x.rr)
distr(x::LinPredModel) = distr(x.rr)

function _vcov(pp::LinPred, Z::AbstractMatrix, A::AbstractMatrix)
if linpred_rank(pp) < size(Z, 2)
Expand Down Expand Up @@ -434,8 +434,19 @@ end
For linear and generalized linear models, return the number of rows when
the model is unweighted or uses analytical or probability weights.
If the model uses frequency weights, return the sum of weights.
Rows with zero weights are not counted.
"""
nobs(obj::LinPredModel) = nobs(obj.rr)
function nobs(m::LinPredModel)
wts = weights(m)
n = if wts isa UnitWeights
length(wts)
elseif wts isa FrequencyWeights
sum(wts)
elseif wts isa Union{ProbabilityWeights,AnalyticWeights}
sum(!iszero, wts)
end
return oftype(sum(one(eltype(wts))), n)
end

weights(m::LinPredModel) = weights(m.rr)
weights(pp::LinPred) = pp.wts
Expand Down
11 changes: 3 additions & 8 deletions src/lm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,18 @@ function deviance(r::LmResp)
v += abs2(y[i] - mu[i]) * wts[i]
end
end
return wts isa ProbabilityWeights ? v ./ (sum(wts) / length(y)) : v
return wts isa ProbabilityWeights ? v ./ (sum(wts) / sum(!iszero, wts)) : v
end

weights(r::LmResp) = r.wts
function isweighted(r::LmResp)
return weights(r) isa Union{AnalyticWeights,FrequencyWeights,ProbabilityWeights}
end

nobs(r::LmResp{<:Any,<:FrequencyWeights}) = sum(r.wts)
function nobs(r::LmResp{<:Any,<:AbstractWeights})
return oftype(sum(one(eltype(r.wts))), length(r.y))
end

working_weights(r::LmResp) = r.wts

function loglikelihood(r::LmResp{<:Any,<:Union{UnitWeights,FrequencyWeights}})
n = nobs(r)
n = sum(weights(r))
return -n / 2 * (log(2π * deviance(r) / n) + 1)
end

Expand All @@ -106,7 +101,7 @@ function residuals(r::LmResp; weighted::Bool=false)
end
end

link(rr::LmResp) = IdentityLink()
distr(rr::LmResp) = Normal()

"""
LinearModel
Expand Down
46 changes: 46 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2659,3 +2659,49 @@ end
x -0.502451 0.675377 -0.74 0.4982 -2.3776 1.3727
─────────────────────────────────────────────────────────────────────────"""
end

@testset "nobs, deviance & stderror with zero weights" begin
X = [ones(10) 1:10]
y = [1, 4, 6, 2, 3, 5, 6, 7, 1, 6]
wts = [1, 0, 4, 0, 5, 2, 6, 4, 2, 6]

lmod = lm(X, y)
glmod = glm(X, y, Normal())
@test nobs(lmod) == nobs(glmod) == 10
@test dof_residual(lmod) == dof_residual(glmod) == 8
@test deviance(lmod) ≈ deviance(glmod) ≈ 39.29696969696969
@test coef(lmod) ≈ coef(glmod) ≈ [2.6666666666666665, 0.2606060606060606]
@test stderror(lmod) ≈ stderror(glmod) ≈ [1.51404201801774, 0.24400996532360475]

lmod = lm(X, y, wts=uweights(10))
glmod = glm(X, y, Normal(), wts=uweights(10))
@test nobs(lmod) == nobs(glmod) == 10
@test dof_residual(lmod) == dof_residual(glmod) == 8
@test deviance(lmod) ≈ deviance(glmod) ≈ 39.29696969696969
@test coef(lmod) ≈ coef(glmod) ≈ [2.6666666666666665, 0.2606060606060606]
@test stderror(lmod) ≈ stderror(glmod) ≈ [1.51404201801774, 0.24400996532360475]

lmod = lm(X, y, wts=fweights(wts))
glmod = glm(X, y, Normal(), wts=fweights(wts))
@test nobs(lmod) == nobs(glmod) == sum(wts)
@test dof_residual(lmod) == dof_residual(glmod) == sum(wts) - 2
@test deviance(lmod) ≈ deviance(glmod) ≈ 91.87804878048782
@test coef(lmod) ≈ coef(glmod) ≈ [3.6707317073170724, 0.20731707317073195]
@test stderror(lmod) ≈ stderror(glmod) ≈ [0.9538283539174621, 0.13286974786560674]

lmod = lm(X, y, wts=aweights(wts))
glmod = glm(X, y, Normal(), wts=aweights(wts))
@test nobs(lmod) == nobs(glmod) == sum(!iszero, wts)
@test dof_residual(lmod) == dof_residual(glmod) == sum(!iszero, wts) - 2
@test deviance(lmod) ≈ deviance(glmod) ≈ 91.87804878048782
@test coef(lmod) ≈ coef(glmod) ≈ [3.6707317073170724, 0.20731707317073195]
@test stderror(lmod) ≈ stderror(glmod) ≈ [2.060504744176091, 0.2870314608599428]

lmod = lm(X, y, wts=pweights(wts))
glmod = glm(X, y, Normal(), wts=pweights(wts))
@test nobs(lmod) == nobs(glmod) == sum(!iszero, wts)
@test dof_residual(lmod) == dof_residual(glmod) == sum(!iszero, wts) - 2
@test deviance(lmod) ≈ deviance(glmod) ≈ 24.500813008130084
@test coef(lmod) ≈ coef(glmod) ≈ [3.6707317073170724, 0.20731707317073195]
@test stderror(lmod) ≈ stderror(glmod) ≈ [1.7617126628715203, 0.23455696986842048]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gragusa I get slightly different standard errors with svyglm. Is this expected? Generally in my tests I got exactly the same values up to a least four decimals.

> X = 1:10
> y = c(1, 4, 6, 2, 3, 5, 6, 7, 1, 6)
> wts = c(1, 0, 4, 0, 5, 2, 6, 4, 2, 6)
> svyd <- svydesign(~1, weights=wts, data=data.frame(X=X, y=y, wts=wts))

> summary(svyglm(y ~ X, svyd))

Call:
svyglm(formula = y ~ X, design = svyd)

Survey design:
svydesign(~1, weights = wts, data = data.frame(X = X, y = y, 
    wts = wts))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)   3.6707     1.7371   2.113    0.079 .
X             0.2073     0.2313   0.896    0.405  
---
Signif. codes:  0***0.001**0.01*0.05.0.1 ‘ ’ 1

(Dispersion parameter for gaussian family taken to be 3.500116)

end
Loading