From 4c7af6d1b85a39e5a1e21ce3791bc53fdfe9e8a4 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 24 Sep 2019 17:55:45 +0200 Subject: [PATCH 1/8] initial work on na omit --- src/linearmixedmodel.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index f78f2320a..144ce222c 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -43,7 +43,15 @@ LinearMixedModel(f::FormulaTerm, tbl; function LinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable; contrasts = Dict{Symbol,Any}(), wts = []) + form = apply_schema(f, schema(f, tbl, contrasts), LinearMixedModel) + form_fe_lhs = form.lhs + form_fe_rhs = Tuple(vcat( + [t for t in form.rhs if !isa(t,RandomEffectsTerm)], + [t.rhs for t in form.rhs if isa(t,RandomEffectsTerm)])) + form_fe = FormulaTerm(form_fe_lhs, form_fe_rhs) + tbl, _ = StatsModels.missing_omit(tbl, form_fe) + y, Xs = modelcols(form, tbl) y = reshape(float(y), (:, 1)) # y as a floating-point matrix From fe5631a3abf97f4788cd051cfad564c0f8f65317 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 24 Sep 2019 19:36:33 +0200 Subject: [PATCH 2/8] simplify kludge for models witout fancy formulas --- src/linearmixedmodel.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 144ce222c..f43f2ff4c 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -45,12 +45,8 @@ function LinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable; wts = []) form = apply_schema(f, schema(f, tbl, contrasts), LinearMixedModel) - form_fe_lhs = form.lhs - form_fe_rhs = Tuple(vcat( - [t for t in form.rhs if !isa(t,RandomEffectsTerm)], - [t.rhs for t in form.rhs if isa(t,RandomEffectsTerm)])) - form_fe = FormulaTerm(form_fe_lhs, form_fe_rhs) - tbl, _ = StatsModels.missing_omit(tbl, form_fe) + print("changed") + tbl, _ = StatsModels.missing_omit(tbl, f) y, Xs = modelcols(form, tbl) From 9af41abe3e492b11a7084b500988a65c34e965e5 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 24 Sep 2019 19:43:07 +0200 Subject: [PATCH 3/8] StatsModels.termvars for RandomEffectsTerm --- src/randomeffectsterm.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/randomeffectsterm.jl b/src/randomeffectsterm.jl index 6fb0530bb..615dfc0c7 100644 --- a/src/randomeffectsterm.jl +++ b/src/randomeffectsterm.jl @@ -6,6 +6,10 @@ end Base.show(io::IO, t::RandomEffectsTerm) = print(io, "($(t.lhs) | $(t.rhs))") StatsModels.is_matrix_term(::Type{RandomEffectsTerm}) = false +function StatsModels.termvars(t::RandomEffectsTerm) + vcat(StatsModels.termvars(t.lhs), StatsModels.termvars(t.rhs)) +end + function StatsModels.apply_schema(t::FunctionTerm{typeof(|)}, schema::StatsModels.FullRank, Mod::Type{<:MixedModel}) lhs, rhs = apply_schema.(t.args_parsed, Ref(schema), Mod) From 794a9d408856b27bc65fa524087e0bd48d2e4344 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 24 Sep 2019 19:47:23 +0200 Subject: [PATCH 4/8] use the schema'd form --- src/linearmixedmodel.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index f43f2ff4c..becc7c0fa 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -45,8 +45,7 @@ function LinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable; wts = []) form = apply_schema(f, schema(f, tbl, contrasts), LinearMixedModel) - print("changed") - tbl, _ = StatsModels.missing_omit(tbl, f) + tbl, _ = StatsModels.missing_omit(tbl, form) y, Xs = modelcols(form, tbl) From 7058399ee70267c37efb4fabcd7a3f1ffd3deb32 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 25 Sep 2019 09:07:46 +0200 Subject: [PATCH 5/8] initial testsuite for missing --- src/randomeffectsterm.jl | 7 +++++++ test/FactorReTerm.jl | 10 ++++++++++ test/missing.jl | 36 ++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 54 insertions(+) create mode 100644 test/missing.jl diff --git a/src/randomeffectsterm.jl b/src/randomeffectsterm.jl index 615dfc0c7..52f7bf4dc 100644 --- a/src/randomeffectsterm.jl +++ b/src/randomeffectsterm.jl @@ -1,6 +1,13 @@ struct RandomEffectsTerm <: AbstractTerm lhs::StatsModels.TermOrTerms rhs::StatsModels.TermOrTerms + function RandomEffectsTerm(lhs,rhs) + if isempty(intersect(StatsModels.termvars(lhs), StatsModels.termvars(rhs))) + new(lhs, rhs) + else + throw(ArgumentError("Same variable appears on both sides of |")) + end + end end Base.show(io::IO, t::RandomEffectsTerm) = print(io, "($(t.lhs) | $(t.rhs))") diff --git a/test/FactorReTerm.jl b/test/FactorReTerm.jl index e207a2361..10d96b211 100644 --- a/test/FactorReTerm.jl +++ b/test/FactorReTerm.jl @@ -79,6 +79,16 @@ const LMM = LinearMixedModel end end +@testset "RandomEffectsTerm" begin + slp = dat[:sleepstudy] + contrasts = Dict{Symbol,Any}() + + @testset "Detect same variable as blocking and experimental" begin + f = @formula(Y ~ 1 + (1 + G|G)) + @test_throws ArgumentError apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) + end +end + #= @testset "vectorRe" begin slp = dat[:sleepstudy] diff --git a/test/missing.jl b/test/missing.jl new file mode 100644 index 000000000..3773d33bb --- /dev/null +++ b/test/missing.jl @@ -0,0 +1,36 @@ +using MixedModels, RData, Test + +if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame}) + const dat = Dict(Symbol(k) => v for (k, v) in + load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda"))) +end + +# deepcopy because we're going to modify it +slp = deepcopy(dat[:sleepstudy]) +slp[!,:U] = Array{Union{Missing, Float64},1}(slp[!,:U]) +slp[1,:U] = missing + +@testset "No impact from missing on schema" begin + f = @formula(Y ~ 1 + U + (1|G)) + contrasts = Dict{Symbol,Any}() + form = apply_schema(f, schema(f, dat[:sleepstudy], contrasts), LinearMixedModel) + form_missing = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) + + @test form.lhs == form_missing.lhs + @test form.rhs == form_missing.rhs +end + +@testset "Missing Omit" begin + @testset "Missing from unused variables" + # missing from unused variables should have no impact + m1 = fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:sleepstudy]) + m1_missing = fit(MixedModel, @formula(Y ~ 1 + (1|G)), slp) + @test isapprox(m1.θ, m1_missing.θ, rtol=1.0e-12) + end + + @testset "Missing from used variables" + m1 = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), dat[:sleepstudy]) + m1_missing = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), slp) + @test nobs(m1) - nobs(m1_missing) == 1 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index d2ff194c9..887b9a10e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,5 +11,6 @@ include("pls.jl") include("pirls.jl") include("gausshermite.jl") include("fit.jl") +include("missing.jl") using MixedModels From 2a202c932611f258f7128bdecc73a624455acf2e Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 25 Sep 2019 11:41:07 +0200 Subject: [PATCH 6/8] missing_omit before apply_schema until StatsModels missing support lands --- src/linearmixedmodel.jl | 6 ++++-- test/missing.jl | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index becc7c0fa..978863402 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -43,9 +43,11 @@ LinearMixedModel(f::FormulaTerm, tbl; function LinearMixedModel(f::FormulaTerm, tbl::Tables.ColumnTable; contrasts = Dict{Symbol,Any}(), wts = []) - + # TODO: perform missing_omit() after apply_schema() when improved + # missing support is in a StatsModels release + tbl, _ = StatsModels.missing_omit(tbl, f) form = apply_schema(f, schema(f, tbl, contrasts), LinearMixedModel) - tbl, _ = StatsModels.missing_omit(tbl, form) + # tbl, _ = StatsModels.missing_omit(tbl, form) y, Xs = modelcols(form, tbl) diff --git a/test/missing.jl b/test/missing.jl index 3773d33bb..eb7abd89b 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -10,15 +10,16 @@ slp = deepcopy(dat[:sleepstudy]) slp[!,:U] = Array{Union{Missing, Float64},1}(slp[!,:U]) slp[1,:U] = missing -@testset "No impact from missing on schema" begin - f = @formula(Y ~ 1 + U + (1|G)) - contrasts = Dict{Symbol,Any}() - form = apply_schema(f, schema(f, dat[:sleepstudy], contrasts), LinearMixedModel) - form_missing = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) - - @test form.lhs == form_missing.lhs - @test form.rhs == form_missing.rhs -end +# TODO: re-enable this test when better missing support has landed in StatsModels +# @testset "No impact from missing on schema" begin +# f = @formula(Y ~ 1 + U + (1|G)) +# contrasts = Dict{Symbol,Any}() +# form = apply_schema(f, schema(f, dat[:sleepstudy], contrasts), LinearMixedModel) +# form_missing = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) +# +# @test form.lhs == form_missing.lhs +# @test form.rhs == form_missing.rhs +# end @testset "Missing Omit" begin @testset "Missing from unused variables" From 05bd39e64fea9ef172ea6a847d5029c21b50b6be Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 25 Sep 2019 11:58:04 +0200 Subject: [PATCH 7/8] add in missing begin's --- test/missing.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/missing.jl b/test/missing.jl index eb7abd89b..8a5a8a68a 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -22,14 +22,14 @@ slp[1,:U] = missing # end @testset "Missing Omit" begin - @testset "Missing from unused variables" + @testset "Missing from unused variables" begin # missing from unused variables should have no impact m1 = fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:sleepstudy]) m1_missing = fit(MixedModel, @formula(Y ~ 1 + (1|G)), slp) @test isapprox(m1.θ, m1_missing.θ, rtol=1.0e-12) end - @testset "Missing from used variables" + @testset "Missing from used variables" begin m1 = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), dat[:sleepstudy]) m1_missing = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), slp) @test nobs(m1) - nobs(m1_missing) == 1 From 827c27aa4fa863ef543d381284e8dd4fef3707cf Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 25 Sep 2019 14:42:25 +0200 Subject: [PATCH 8/8] test StatsModels.termvars(t::RandomEffectsTerm) --- test/FactorReTerm.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/FactorReTerm.jl b/test/FactorReTerm.jl index 10d96b211..ef75c0304 100644 --- a/test/FactorReTerm.jl +++ b/test/FactorReTerm.jl @@ -87,6 +87,14 @@ end f = @formula(Y ~ 1 + (1 + G|G)) @test_throws ArgumentError apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) end + + @testset "Detect both blocking and experimental variables" begin + # note that U is not in the fixed effects because we want to make square + # that we're detecting all the variables in the random effects + f = @formula(Y ~ 1 + (1 + U|G)) + form = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel) + @test StatsModels.termvars(form.rhs) == [:U, :G] + end end #=