Skip to content
This repository was archived by the owner on Mar 11, 2022. It is now read-only.

Commit 720f3a5

Browse files
committed
Add F-statistic and improve how intercepts are handled
1 parent 3dde6e7 commit 720f3a5

File tree

5 files changed

+176
-82
lines changed

5 files changed

+176
-82
lines changed

src/InteractionWeightedDIDs.jl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,25 @@ using Base: Callable
44
using Combinatorics: combinations
55
using Dictionaries: Dictionary
66
using FixedEffectModels: FixedEffectTerm, Combination,
7-
fe, has_fe, parse_fixedeffect, omitsintercept, hasintercept,
8-
basecol, isnested, tss, Fstat, nunique
7+
fe, has_fe, parse_fixedeffect, basecol, isnested, nunique
98
using FixedEffects
109
using LinearAlgebra: Factorization, Symmetric, cholesky!
1110
using Printf
1211
using Reexport
13-
using SplitApplyCombine: group, groupfind, groupreduce
14-
using StatsBase: AbstractWeights, UnitWeights, NoQuote, vcov
15-
using StatsModels: termvars, FullRank
16-
using Tables: columntable, getcolumn, rowtable
12+
using SplitApplyCombine: group, mapview
13+
using StatsBase: AbstractWeights, CovarianceEstimator, UnitWeights, NoQuote, vcov
14+
using StatsFuns: fdistccdf
15+
using StatsModels: coefnames
16+
using Tables: getcolumn
1717
using TypedTables: Table
1818
using Vcov
1919
@reexport using DiffinDiffsBase
20+
using DiffinDiffsBase: termvars, hasintercept, omitsintercept, isintercept, parse_intercept
2021

2122
import Base: show
22-
import DiffinDiffsBase: required, default, transformed, combinedargs, _getsubcolumns
23+
import DiffinDiffsBase: required, default, transformed, combinedargs, _getsubcolumns,
24+
valid_didargs, result
25+
import FixedEffectModels: has_fe
2326

2427
# Handle naming conflicts
2528
const getvcov = vcov
@@ -36,7 +39,8 @@ export CheckVcov,
3639
EstVcov,
3740

3841
RegressionBasedDID,
39-
Reg
42+
Reg,
43+
RegressionBasedDIDResult
4044

4145
include("utils.jl")
4246
include("procedures.jl")

src/procedures.jl

Lines changed: 85 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
Exclude rows that are invalid for `vcov`.
55
See also [`CheckVcov`](@ref).
66
"""
7-
checkvcov!(data, esample::BitArray, vcov::Union{Vcov.SimpleCovariance,Vcov.RobustCovariance}) =
8-
NamedTuple(), false
7+
checkvcov!(data, esample::BitVector,
8+
vcov::Union{Vcov.SimpleCovariance, Vcov.RobustCovariance}) = NamedTuple(), false
99

10-
function checkvcov!(data, esample::BitArray, vcov::Vcov.ClusterCovariance)
10+
function checkvcov!(data, esample::BitVector, vcov::Vcov.ClusterCovariance)
1111
esample .&= Vcov.completecases(data, vcov)
1212
return (esample=esample,), false
1313
end
@@ -31,7 +31,7 @@ drop singleton observations for any fixed effect
3131
and determine whether intercept term should be omitted.
3232
See also [`CheckFEs`](@ref).
3333
"""
34-
function checkfes!(data, esample::BitArray, xterms::Terms, drop_singletons::Bool)
34+
function checkfes!(data, esample::BitVector, xterms::Terms, drop_singletons::Bool)
3535
fes, fenames, xterms = parse_fixedeffect(data, xterms)
3636
has_fe_intercept = false
3737
nsingle = 0
@@ -66,7 +66,8 @@ default(::CheckFEs) = (xterms=(), drop_singletons=true)
6666
Construct `FixedEffects.AbstractFixedEffectSolver`.
6767
See also [`MakeFESolver`](@ref).
6868
"""
69-
function makefesolver(fenames::Vector{Symbol}, weights::AbstractWeights, esample::BitArray, fes::Vector{FixedEffect})
69+
function makefesolver(fenames::Vector{Symbol}, weights::AbstractWeights, esample::BitVector,
70+
fes::Vector{FixedEffect})
7071
if !isempty(fes)
7172
fes = FixedEffect[fe[esample] for fe in fes]
7273
feM = AbstractFixedEffectSolver{Float64}(fes, weights, Val{:cpu}, Threads.nthreads())
@@ -92,7 +93,9 @@ function _feresiduals!(M::AbstractArray, feM::AbstractFixedEffectSolver,
9293
tol::Real, maxiter::Integer)
9394
_, iters, convs = solve_residuals!(M, feM; tol=tol, maxiter=maxiter, progress_bar=false)
9495
iter = maximum(iters)
95-
all(convs) || @warn "no convergence of fixed effect solver in $(iter) iterations"
96+
conv = all(convs)
97+
conv || @warn "no convergence of fixed effect solver in $(iter) iterations"
98+
return iter, conv
9699
end
97100

98101
"""
@@ -102,34 +105,47 @@ Construct columns for outcome variables and covariates
102105
and residualize them with fixed effects.
103106
See also [`MakeYXCols`](@ref).
104107
"""
105-
function makeyxcols(data, weights::AbstractWeights, esample::BitArray,
108+
function makeyxcols(data, weights::AbstractWeights, esample::BitVector,
106109
feM::Union{AbstractFixedEffectSolver, Nothing}, has_fe_intercept::Bool,
107110
contrasts::Dict, fetol::Real, femaxiter::Int, allyterm::Terms, allxterms::Terms)
108111

109112
yxcols = Dict{AbstractTerm, VecOrMat{Float64}}()
110113
yxnames = union(termvars(allyterm), termvars(allxterms))
111114
yxdata = _getsubcolumns(data, yxnames, esample)
112115
concrete_yterms = apply_schema(allyterm, schema(allyterm, yxdata), StatisticalModel)
116+
yxterms = Dict{AbstractTerm, AbstractTerm}()
113117
for (t, ct) in zip(eachterm(allyterm), eachterm(concrete_yterms))
114118
ycol = convert(Vector{Float64}, modelcols(ct, yxdata))
115119
all(isfinite, ycol) || error("data for term $ct contain NaN or Inf")
116120
yxcols[t] = ycol
121+
yxterms[t] = ct
117122
end
118123

124+
# Standardize how an intercept or omitsintercept is represented
125+
allxterms = parse_intercept(allxterms)
126+
127+
# Add an intercept if not already having one
128+
has_fe_intercept || hasintercept(allxterms) ||
129+
(allxterms = (allxterms..., InterceptTerm{true}()))
130+
131+
# Any term other than InterceptTerm{true}() that represents the intercept
132+
# will be replaced by InterceptTerm{true}()
133+
# Need to take such changes into account when creating X matrix
119134
xschema = schema(allxterms, yxdata, contrasts)
120-
has_fe_intercept && (xschema = FullRank(xschema, Set([InterceptTerm{true}()])))
121135
concrete_xterms = apply_schema(allxterms, xschema, StatisticalModel)
122136
for (t, ct) in zip(eachterm(allxterms), eachterm(concrete_xterms))
123137
if width(ct) > 0
124138
xcols = convert(Matrix{Float64}, modelmatrix(ct, yxdata))
125139
all(isfinite, xcols) || error("data for term $ct contain NaN or Inf")
126140
yxcols[t] = xcols
127141
end
142+
yxterms[t] = ct
128143
end
129144

145+
iter, conv = nothing, nothing
130146
if feM !== nothing
131147
YX = Combination(values(yxcols)...)
132-
_feresiduals!(YX, feM, fetol, femaxiter)
148+
iter, conv = _feresiduals!(YX, feM, fetol, femaxiter)
133149
end
134150

135151
if !(weights isa UnitWeights)
@@ -138,15 +154,15 @@ function makeyxcols(data, weights::AbstractWeights, esample::BitArray,
138154
end
139155
end
140156

141-
return (yxcols=yxcols,), true
157+
return (yxcols=yxcols, yxterms=yxterms, nfeiterations=iter, feconverged=conv), true
142158
end
143159

144160
"""
145161
MakeYXCols <: StatsStep
146162
147163
Call [`InteractionWeightedDIDs.makeyxcols`](@ref) to obtain
148164
residualized outcome variables and covariates.
149-
The returned object named `yxcols`
165+
The returned objects named `yxcols`, `yxterms`, `nfeiterations` and `feconverged`
150166
may be shared across multiple specifications.
151167
"""
152168
const MakeYXCols = StatsStep{:MakeYXCols, typeof(makeyxcols)}
@@ -155,20 +171,20 @@ required(::MakeYXCols) = (:data, :weights, :esample, :feM, :has_fe_intercept)
155171
default(::MakeYXCols) = (contrasts=Dict{Symbol, Any}(), fetol=1e-8, femaxiter=10000)
156172

157173
function combinedargs(::MakeYXCols, allntargs)
158-
allyterm, allxterms = (allntargs[1].yterm,), allntargs[1].xterms
159174
if length(allntargs) > 1
160-
for i in 2:length(allntargs)
161-
allyterm += allntargs[i].yterm
162-
allxterms += allntargs[i].xterms
175+
allyterm, allxterms = Set{AbstractTerm}(), Set{AbstractTerm}()
176+
for nt in allntargs
177+
push!(allyterm, nt.yterm)
178+
!isempty(nt.xterms) && push!(allxterms, nt.xterms...)
163179
end
180+
return (allyterm...,), (allxterms...,)
181+
else
182+
return (allntargs[1].yterm,), allntargs[1].xterms
164183
end
165-
return allyterm, allxterms
166184
end
167185

168-
_parentinds(idmap::Vector{Int}, idx::Vector{Int}) = idmap[idx]
169-
170186
# Assume idx is sorted
171-
function _genindicator(idx::Vector{Int}, esample::BitArray, n::Int)
187+
function _genindicator(idx::Vector{Int}, esample::BitVector, n::Int)
172188
v = zeros(n)
173189
iv, r = 1, 1
174190
@inbounds for i in eachindex(esample)
@@ -196,7 +212,7 @@ See also [`MakeTreatCols`](@ref).
196212
function maketreatcols(data, treatname::Symbol, treatintterms::Terms,
197213
feM::Union{AbstractFixedEffectSolver, Nothing},
198214
weightname::Union{Symbol, Nothing}, weights::AbstractWeights,
199-
esample::BitArray, tr_rows::BitArray,
215+
esample::BitVector, tr_rows::BitVector,
200216
cohortinteracted::Bool, fetol::Real, femaxiter::Int,
201217
::Type{<:DynamicTreatment{SharpDesign}}, time::Symbol, exc::Set{<:Integer})
202218

@@ -206,20 +222,20 @@ function maketreatcols(data, treatname::Symbol, treatintterms::Terms,
206222
ikept = findall(kept)
207223
# Obtain a fast row iterator without copying (after _getsubcolumns)
208224
trows = Table(_getsubcolumns(data, tnames, kept))
209-
itreats = groupfind(trows)
210-
# Convert indices of trows to indices of data
211-
itreats .= _parentinds.(Ref(ikept), itreats)
225+
# Obtain row indices that take value one for each treatment indicator
226+
itreats = group(trows, ikept)
212227

228+
# Calculate relative time
213229
if cohortinteracted
214230
tnames = (:rel, treatname, termvars(treatintterms)...)
215231
f = k -> NamedTuple{tnames}((getfield(k, time) - getfield(k, treatname),
216232
getfield(k, treatname), (getfield(k, n) for n in termvars(treatintterms))...))
217233
itreats = Dictionary(map(f, keys(itreats)), itreats)
218234
else
219235
tnames = (:rel, termvars(treatintterms)...)
220-
byf = p -> NamedTuple{tnames}((getfield(p[1], time) - getfield(p[1], treatname),
221-
(getfield(p[1], n) for n in termvars(treatintterms))...))
222-
itreats = group(byf, p->p[2], pairs(itreats))
236+
byf = p -> NamedTuple{tnames}((getfield(p, time) - getfield(p, treatname),
237+
(getfield(p, n) for n in termvars(treatintterms))...))
238+
itreats = group(mapview(byf, keys(itreats)), itreats)
223239
itreats = map(x->sort!(vcat(x...)), itreats)
224240
end
225241

@@ -267,16 +283,24 @@ combinedargs(::MakeTreatCols, allntargs, ::Type{<:DynamicTreatment{SharpDesign}}
267283
(Set(intersect((nt.tr.exc for nt in allntargs)...)),)
268284

269285
"""
270-
solveleastsquares(args...)
286+
solveleastsquares!(args...)
271287
272288
Solve the least squares problem for regression coefficients and residuals.
273289
See also [`SolveLeastSquares`](@ref).
274290
"""
275-
function solveleastsquares(tr::DynamicTreatment{SharpDesign}, yterm::AbstractTerm,
276-
xterms::Terms, yxcols::Dict, treatcols::Dictionary)
291+
function solveleastsquares!(tr::DynamicTreatment{SharpDesign}, yterm::AbstractTerm,
292+
xterms::Terms, yxterms::Dict, yxcols::Dict, treatcols::Dictionary,
293+
has_fe_intercept::Bool)
277294
y = yxcols[yterm]
278295
ts = sort!([k for k in keys(treatcols) if !(k.rel in tr.exc)])
279-
X = hcat((treatcols[k] for k in ts)..., (yxcols[k] for k in xterms)...)
296+
# Be consistent with allxterms in makeyxcols
297+
xterms = parse_intercept(xterms)
298+
# Add an intercept if needed
299+
has_fe_intercept || hasintercept(xterms) ||
300+
omitsintercept(xterms) || (xterms = (xterms..., InterceptTerm{true}()))
301+
302+
X = hcat((treatcols[k] for k in ts)...,
303+
(yxcols[k] for k in xterms if width(yxterms[k])>0)...)
280304

281305
nts = length(ts)
282306
basecols = trues(size(X,2))
@@ -293,24 +317,26 @@ function solveleastsquares(tr::DynamicTreatment{SharpDesign}, yterm::AbstractTer
293317
coef = crossx \ (X'y)
294318
residuals = y - X * coef
295319

296-
treatinds = Table(ts)
297-
return (coef=coef, crossx=crossx, residuals=residuals, basecols=basecols,
298-
treatinds=treatinds), true
320+
treatinds = Table(ts)
321+
322+
return (coef=coef, X=X, crossx=crossx, residuals=residuals, treatinds=treatinds,
323+
xterms=xterms, basecols=basecols), true
299324
end
300325

301326
"""
302327
SolveLeastSquares <: StatsStep
303328
304-
Call [`InteractionWeightedDIDs.solveleastsquares`](@ref) to
329+
Call [`InteractionWeightedDIDs.solveleastsquares!`](@ref) to
305330
solve the least squares problem for regression coefficients and residuals.
306-
The returned object named `coef`, `crossx`, `residuals`, `basecols` and `treatinds`
331+
The returned objects named `coef`, `X`, `crossx`, `residuals`, `basecols` and `treatinds`
307332
may be shared across multiple specifications.
308333
"""
309-
const SolveLeastSquares = StatsStep{:SolveLeastSquares, typeof(solveleastsquares)}
334+
const SolveLeastSquares = StatsStep{:SolveLeastSquares, typeof(solveleastsquares!)}
310335

311-
required(::SolveLeastSquares) = (:tr, :yterm, :xterms, :yxcols, :treatcols)
336+
required(::SolveLeastSquares) = (:tr, :yterm, :xterms, :yxterms, :yxcols, :treatcols,
337+
:has_fe_intercept)
312338

313-
function _vcov(data, esample::BitArray,
339+
function _vcov(data, esample::BitVector,
314340
vcov::Union{Vcov.SimpleCovariance,Vcov.RobustCovariance}, fes::Vector{FixedEffect})
315341
dof_absorb = 0
316342
for fe in fes
@@ -319,7 +345,7 @@ function _vcov(data, esample::BitArray,
319345
return vcov, dof_absorb
320346
end
321347

322-
function _vcov(data, esample::BitArray, vcov::Vcov.ClusterCovariance,
348+
function _vcov(data, esample::BitVector, vcov::Vcov.ClusterCovariance,
323349
fes::Vector{FixedEffect})
324350
cludata = _getsubcolumns(data, vcov.clusters, esample)
325351
concrete_vcov = Vcov.materialize(cludata, vcov)
@@ -333,25 +359,36 @@ end
333359
"""
334360
estvcov(args...)
335361
336-
Estimate variance-covariance matrix.
362+
Estimate variance-covariance matrix and F-statistic.
337363
See also [`EstVcov`](@ref).
338364
"""
339-
function estvcov(data, esample::BitArray, vcov::Vcov.CovarianceEstimator,
340-
X::Matrix, crossx::Factorization, residuals::Vector, fes::Vector{FixedEffect})
365+
function estvcov(data, esample::BitVector, vcov::CovarianceEstimator, coef::Vector,
366+
X::Matrix, crossx::Factorization, residuals::Vector,
367+
xterms::Terms, fes::Vector{FixedEffect}, has_fe_intercept::Bool)
341368
concrete_vcov, dof_absorb = _vcov(data, esample, vcov, fes)
342-
dof_residuals = max(1, sum(esample) - size(X,2) - dof_absorb)
343-
vcov_data = Vcov.VcovData(X, crossx, residuals, dof_residuals)
369+
dof_resid = max(1, sum(esample) - size(X,2) - dof_absorb)
370+
vcov_data = Vcov.VcovData(X, crossx, residuals, dof_resid)
344371
vcov_mat = getvcov(vcov_data, concrete_vcov)
345-
return (vcov_mat=vcov_mat, dof_residuals=dof_residuals), true
372+
373+
# Fstat assumes the last coef is intercept if having any intercept
374+
has_intercept = !isempty(xterms) && isintercept(xterms[end])
375+
F = Fstat(coef, vcov_mat, has_intercept)
376+
has_intercept = has_intercept || has_fe_intercept
377+
df_F = max(1, Vcov.df_FStat(vcov_data, concrete_vcov, has_intercept))
378+
p = fdistccdf(max(length(coef) - has_intercept, 1), df_F, F)
379+
380+
return (vcov_mat=vcov_mat, dof_resid=dof_resid, F=F, p=p), true
346381
end
347382

348383
"""
349384
EstVcov <: StatsStep
350385
351-
Call [`InteractionWeightedDIDs.estvcov`](@ref) to estimate variance-covariance matrix.
352-
The returned object named `vcov_mat` and `dof_residuals`
386+
Call [`InteractionWeightedDIDs.estvcov`](@ref) to
387+
estimate variance-covariance matrix and F-statistic.
388+
The returned objects named `vcov_mat`, `dof_resid`, `F` and `p`
353389
may be shared across multiple specifications.
354390
"""
355391
const EstVcov = StatsStep{:EstVcov, typeof(estvcov)}
356392

357-
required(::EstVcov) = (:data, :esample, :vcov, :X, :crossx, :residuals, :fes)
393+
required(::EstVcov) = (:data, :esample, :vcov, :coef, :X, :crossx, :residuals, :xterms,
394+
:fes, :has_fe_intercept)

src/utils.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,12 @@ function drop_singletons!(esample, fe::FixedEffect)
1515
end
1616
return n
1717
end
18+
19+
function Fstat(coef::Vector{Float64}, vcov_mat::AbstractMatrix{Float64}, has_intercept::Bool)
20+
length(coef) == has_intercept && return NaN
21+
if has_intercept
22+
coef = coef[1:end-1]
23+
vcov_mat = vcov_mat[1:end-1, 1:end-1]
24+
end
25+
return (coef' * (vcov_mat \ coef)) / length(coef)
26+
end

0 commit comments

Comments
 (0)