Skip to content

Commit d239959

Browse files
dmbatespalday
andauthored
Switch to Feather/Artifacts for test data (#238)
* Switch to Feather/Artifacts for test data * dat.rda is no longer needed for tests * Set compat range for `Feather` package * fix sha256 entry * consistency in helper functions across tests * fix remnant of merge Co-authored-by: Phillip Alday <[email protected]>
1 parent c983d58 commit d239959

File tree

12 files changed

+141
-126
lines changed

12 files changed

+141
-126
lines changed

Artifacts.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[TestData]
2+
git-tree-sha1 = "9d575764bc1c1a7860c34c5b153251e5f2ee6704"
3+
lazy = true
4+
5+
[[TestData.download]]
6+
sha256 = "0b63ae3e9e457ee4b33482d3bf8cc7f20c8ed7c8b2c863af311ba0944c6d46e4"
7+
url = "https://ndownloader.figshare.com/files/21085968"

Project.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
1111
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1212
NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
1313
NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673"
14+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1415
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1516
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
1617
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -43,8 +44,8 @@ julia = "1.3"
4344

4445
[extras]
4546
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
46-
RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"
47+
Feather = "becb17da-46f6-5d3c-ad1b-1c5fe96bc73c"
4748
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4849

4950
[targets]
50-
test = ["Test", "DataFrames", "RData"]
51+
test = ["Test", "DataFrames", "Feather"]

src/MixedModels.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ using LinearAlgebra
88
using NamedArrays
99
using NLopt
1010
using Random
11+
using Pkg.Artifacts
1112
using ProgressMeter
1213
using Showoff
1314
using SparseArrays
@@ -52,6 +53,7 @@ export @formula,
5253
RandomEffectsTerm,
5354
ReMat,
5455
SqrtLink,
56+
TestData,
5557
UniformBlockDiagonal,
5658
VarCorr,
5759
aic,
@@ -107,6 +109,10 @@ import Base: ==, *
107109

108110
abstract type MixedModel{T} <: StatsModels.RegressionModel end # model with fixed and random effects
109111

112+
function __init__()
113+
global TestData = artifact"TestData"
114+
end
115+
110116
include("utilities.jl")
111117
include("arraytypes.jl")
112118
include("varcorr.jl")

test/FactorReTerm.jl

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
1-
using DataFrames, LinearAlgebra, MixedModels, Random, RData, SparseArrays, StatsModels, Test
2-
3-
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
4-
const dat = Dict(Symbol(k) => v for (k, v) in
5-
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
6-
end
1+
using DataFrames
2+
using Feather
3+
using LinearAlgebra
4+
using MixedModels
5+
using Random
6+
using SparseArrays
7+
using StatsModels
8+
using Test
79

810
const LMM = LinearMixedModel
911

12+
data(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
13+
data(nm::Symbol) = data(string(nm))
14+
1015
@testset "scalarReMat" begin
11-
ds = dat[:Dyestuff]
12-
f1 = @formula(Y ~ 1 + (1|G))
16+
ds = data("dyestuff")
17+
f1 = @formula(yield ~ 1 + (1|batch))
1318
y1, Xs1 = modelcols(apply_schema(f1, schema(ds), LMM), ds)
1419
sf = Xs1[2]
15-
psts = dat[:Pastes]
16-
f2 = @formula(Y ~ 1 + (1|G) + (1|H))
20+
psts = data("pastes")
21+
f2 = @formula(strength ~ 1 + (1|sample) + (1|batch))
1722
y2, Xs2 = modelcols(apply_schema(f2, schema(psts), LMM), psts)
1823
sf1 = Xs2[2]
1924
sf2 = Xs2[3]
@@ -78,36 +83,36 @@ const LMM = LinearMixedModel
7883
end
7984

8085
@testset "RandomEffectsTerm" begin
81-
slp = dat[:sleepstudy]
86+
slp = data("sleepstudy")
8287
contrasts = Dict{Symbol,Any}()
8388

8489
@testset "Detect same variable as blocking and experimental" begin
85-
f = @formula(Y ~ 1 + (1 + G|G))
90+
f = @formula(reaction ~ 1 + (1 + subj|subj))
8691
@test_throws ArgumentError apply_schema(f, schema(f, slp, contrasts), LinearMixedModel)
8792
end
8893

8994
@testset "Detect both blocking and experimental variables" begin
9095
# note that U is not in the fixed effects because we want to make square
9196
# that we're detecting all the variables in the random effects
92-
f = @formula(Y ~ 1 + (1 + U|G))
97+
f = @formula(reaction ~ 1 + (1 + days|subj))
9398
form = apply_schema(f, schema(f, slp, contrasts), LinearMixedModel)
94-
@test StatsModels.termvars(form.rhs) == [:U, :G]
99+
@test StatsModels.termvars(form.rhs) == [:days, :subj]
95100
end
96101
end
97102

98103
@testset "Categorical Blocking Variable" begin
99104
# deepcopy because we're going to modify it
100-
slp = deepcopy(dat[:sleepstudy])
105+
slp = deepcopy(data("sleepstudy"))
101106
contrasts = Dict{Symbol,Any}()
102-
f = @formula(Y ~ 1 + (1|G))
107+
f = @formula(reaction ~ 1 + (1|subj))
103108

104109
# String blocking-variables work fine because StatsModels is smart enough to
105110
# treat strings as Categorical. Note however that this is a
106111
# far less efficient to store the original dataframe, although it doesn't
107112
# matter for the contrast matrix
108-
slp[!,:G] = convert.(String, slp[!, :G])
113+
slp[!,:subj] = convert.(String, slp[!, :subj])
109114
# @test_throws ArgumentError LinearMixedModel(f, slp)
110-
slp[!,:G] = parse.(Int, slp[!, :G])
115+
slp.subj = parse.(Int, getindex.(slp.subj, Ref(2:4)))
111116
@test_throws ArgumentError LinearMixedModel(f, slp)
112117
end
113118

@@ -169,5 +174,6 @@ end
169174
@test modelcols(last(ff.rhs), dat) == float(Matrix(I, 18, 18))
170175

171176
@test_broken fit(MixedModel, @formula(Y ~ 1 + (1|H/c)), dat[:Pastes])
177+
172178
end
173179
end

test/Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[deps]
22
BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
33
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
4+
Feather = "becb17da-46f6-5d3c-ad1b-1c5fe96bc73c"
45
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
56
NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673"
6-
RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"
77
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
88
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
99
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -12,4 +12,4 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1212
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1313

1414
[compat]
15-
RData = "0.5, 0.6, 0.7"
15+
Feather = "0.5"

test/UniformBlockDiagonal.jl

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
using DataFrames, LinearAlgebra, MixedModels, Random, RData, SparseArrays, StatsModels, Test
1+
using DataFrames
2+
using Feather
3+
using LinearAlgebra
4+
using MixedModels
5+
using Random
6+
using SparseArrays
7+
using StatsModels
8+
using Test
29

3-
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
4-
const dat = Dict(Symbol(k) => v for (k, v) in
5-
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
6-
end
10+
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, nm * ".feather"))
11+
dat(nm::Symbol) = dat(string(nm))
712

813
const LMM = LinearMixedModel
914

@@ -54,10 +59,10 @@ const LMM = LinearMixedModel
5459

5560
@testset "updateL" begin
5661
@test ones(2, 2) == MixedModels.rankUpdate!(Hermitian(zeros(2, 2)), ones(2))
57-
d3 = dat[:d3]
62+
d3 = dat(:d3)
5863
sch = schema(d3)
59-
vf1 = modelcols(apply_schema(@formula(Y ~ 1 + U + (1+U|G)), sch, LMM), d3)[2][2]
60-
vf2 = modelcols(apply_schema(@formula(Y ~ 1 + U + (1+U|H)), sch, LMM), d3)[2][2]
64+
vf1 = modelcols(apply_schema(@formula(y ~ 1 + u + (1+u|g)), sch, LMM), d3)[2][2]
65+
vf2 = modelcols(apply_schema(@formula(y ~ 1 + u + (1+u|h)), sch, LMM), d3)[2][2]
6166
@test vf1.λ == LowerTriangular(Matrix(I, 2, 2))
6267
setθ!(vf2, [1.75, 0.0, 1.0])
6368
A11 = vf1'vf1

test/dat.rda

-9.67 MB
Binary file not shown.

test/fit.jl

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,31 @@
1-
using MixedModels, RData, Test
1+
using MixedModels, Feather, Test
22

3-
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
4-
const dat = Dict(Symbol(k) => v for (k, v) in
5-
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
6-
end
3+
data(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4+
5+
data(nm::Symbol) = data(string(nm))
76

87
@testset "linear" begin
9-
m1 = fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:Dyestuff])
8+
m1 = fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff))
109
@test first(m1.θ) 0.7525806757718846 rtol=1.0e-5
1110
end
1211

1312
@testset "generalized" begin
14-
gm1 = fit(MixedModel, @formula(use ~ 1 + urb + l + a + abs2(a) + (1|d)),
15-
dat[:Contraception], Bernoulli())
13+
gm1 = fit(MixedModel, @formula(use ~ 1 + urban + livch + age + abs2(age) + (1|dist)),
14+
data(:contra), Bernoulli())
1615
@test deviance(gm1) 2372.7286 atol=1.0e-3
1716
end
1817

1918
@testset "Normal-IdentityLink" begin
20-
@test isa(fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:Dyestuff], Normal()),
19+
@test isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff), Normal()),
2120
LinearMixedModel)
2221
@test_throws(ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink"),
2322
fit(GeneralizedLinearMixedModel,
24-
@formula(Y ~ 1 + (1|G)),
25-
dat[:Dyestuff]))
23+
@formula(yield ~ 1 + (1|batch)),
24+
data(:dyestuff)))
2625
end
2726

2827
@testset "Normal Distribution GLMM" begin
29-
@test isa(fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:Dyestuff],
30-
Normal(), SqrtLink()),
31-
GeneralizedLinearMixedModel)
28+
@test_broken(isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff),
29+
Normal(), LogLink),
30+
GeneralizedLinearMixedModel))
3231
end

test/missing.jl

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
using MixedModels, RData, Test
2-
3-
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
4-
const dat = Dict(Symbol(k) => v for (k, v) in
5-
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
6-
end
1+
using Feather, MixedModels, Test
72

3+
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4+
dat(nm::Symbol) = dat(string(nm))
85
# deepcopy because we're going to modify it
9-
slp = deepcopy(dat[:sleepstudy])
10-
slp[!,:U] = Array{Union{Missing, Float64},1}(slp[!,:U])
11-
slp[1,:U] = missing
6+
slp = deepcopy(dat(:sleepstudy))
7+
slp[!,:days] = Array{Union{Missing, Float64},1}(slp[!,:days])
8+
slp[1,:days] = missing
129

1310
# TODO: re-enable this test when better missing support has landed in StatsModels
1411
# @testset "No impact from missing on schema" begin
@@ -24,14 +21,14 @@ slp[1,:U] = missing
2421
@testset "Missing Omit" begin
2522
@testset "Missing from unused variables" begin
2623
# missing from unused variables should have no impact
27-
m1 = fit(MixedModel, @formula(Y ~ 1 + (1|G)), dat[:sleepstudy])
28-
m1_missing = fit(MixedModel, @formula(Y ~ 1 + (1|G)), slp)
24+
m1 = fit(MixedModel, @formula(reaction ~ 1 + (1|subj)), dat(:sleepstudy))
25+
m1_missing = fit(MixedModel, @formula(reaction ~ 1 + (1|subj)), slp)
2926
@test isapprox(m1.θ, m1_missing.θ, rtol=1.0e-12)
3027
end
3128

3229
@testset "Missing from used variables" begin
33-
m1 = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), dat[:sleepstudy])
34-
m1_missing = fit(MixedModel, @formula(Y ~ 1 + U + (1|G)), slp)
30+
m1 = fit(MixedModel, @formula(reaction ~ 1 + days + (1|subj)), dat(:sleepstudy))
31+
m1_missing = fit(MixedModel, @formula(reaction ~ 1 + days + (1|subj)), slp)
3532
@test nobs(m1) - nobs(m1_missing) == 1
3633
end
3734
end

test/pirls.jl

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
using DataFrames, LinearAlgebra, MixedModels, RData, Test
2-
if !@isdefined(dat) || !isa(dat, Dict{Symbol, DataFrame})
3-
const dat = Dict(Symbol(k) => v for (k, v) in
4-
load(joinpath(dirname(pathof(MixedModels)), "..", "test", "dat.rda")))
5-
end
1+
using DataFrames, Feather, LinearAlgebra, MixedModels, Test
2+
3+
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4+
dat(nm::Symbol) = dat(string(nm))
65

76
@testset "contra" begin
8-
contra = dat[:Contraception]
9-
contra[!, :urbdist] = categorical(string.(contra[!, :d], contra[!, :urb]))
10-
contraform = @formula(use ~ 1+a+abs2(a)+urb+l+(1|urbdist))
7+
contra = dat(:contra)
8+
contraform = @formula(use ~ 1+age+abs2(age)+urban+livch+(1|urbdist))
119
gm0 = fit(MixedModel, contraform, contra, Bernoulli(), fast=true);
1210
@test gm0.lowerbd == zeros(1)
1311
@test isapprox(gm0.θ, [0.5720734451352923], atol=0.001)
@@ -28,7 +26,7 @@ end
2826
@test isnan(gm1.σ)
2927
@test length(gm1.y) == size(gm1.X, 1)
3028
@test in propertynames(gm0)
31-
gm0.βθ = vcat(gm0.β, gm0.theta)
29+
# gm0.βθ = vcat(gm0.β, gm0.theta)
3230
# the next three values are not well defined in the optimization
3331
#@test isapprox(logdet(gm1), 75.7217, atol=0.1)
3432
#@test isapprox(sum(abs2, gm1.u[1]), 48.4747, atol=0.1)
@@ -37,21 +35,20 @@ end
3735
end
3836

3937
@testset "cbpp" begin
40-
cbpp = dat[:cbpp]
41-
cbpp[!, :prop] = cbpp[!, :i] ./ cbpp[!, :s]
42-
gm2 = fit(MixedModel, @formula(prop ~ 1 + p + (1|h)), cbpp, Binomial(), wts=cbpp[!,:s])
43-
@test isapprox(deviance(gm2,true), 100.09585619892968, atol=0.0001)
44-
@test isapprox(sum(abs2, gm2.u[1]), 9.723054788538546, atol=0.0001)
45-
@test isapprox(logdet(gm2), 16.90105378801136, atol=0.0001)
38+
cbpp = dat(:cbpp)
39+
gm2 = fit(MixedModel, @formula((incid/hsz) ~ 1 + period + (1|herd)), cbpp, Binomial(), wts=float(cbpp.hsz))
40+
@test deviance(gm2,true) 100.09585619892968 atol=0.0001
41+
@test sum(abs2, gm2.u[1]) 9.723054788538546 atol=0.0001
42+
@test logdet(gm2) 16.90105378801136 atol=0.0001
4643
@test isapprox(sum(gm2.resp.devresid), 73.47174762237978, atol=0.001)
4744
@test isapprox(loglikelihood(gm2), -92.02628186840045, atol=0.001)
4845
@test isnan(sdest(gm2))
4946
@test varest(gm2) == 1
5047
end
5148

5249
@testset "verbagg" begin
53-
gm3 = fit(MixedModel, @formula(r2 ~ 1 + a + g + b + s + (1|id)+(1|item)), dat[:VerbAgg],
54-
Bernoulli())
50+
gm3 = fit(MixedModel, @formula(r2 ~ 1+anger+gender+btype+situ+(1|subj)+(1|item)),
51+
dat(:verbagg), Bernoulli())
5552
@test deviance(gm3) 8151.40 rtol=1e-5
5653
@test lowerbd(gm3) == vcat(fill(-Inf, 6), zeros(2))
5754
@test fitted(gm3) == predict(gm3)
@@ -61,8 +58,11 @@ end
6158
end
6259

6360
@testset "grouseticks" begin
64-
gm4 = fit(MixedModel, @formula(t ~ 1 + y + ch + (1|i) + (1|b) + (1|l)),
65-
dat[:grouseticks], Poisson(), fast=true) # fails in pirls! with fast=false
61+
center(v::AbstractVector) = v .- (sum(v) / length(v))
62+
grouseticks = dat(:grouseticks)
63+
grouseticks.ch = center(grouseticks.height)
64+
gm4 = fit(MixedModel, @formula(ticks ~ 1+year+ch+ (1|index) + (1|brood) + (1|location)),
65+
grouseticks, Poisson(), fast=true) # fails in pirls! with fast=false
6666
@test isapprox(deviance(gm4), 851.4046, atol=0.001)
6767
# these two values are not well defined at the optimum
6868
#@test isapprox(sum(x -> sum(abs2, x), gm4.u), 196.8695297987013, atol=0.1)

0 commit comments

Comments
 (0)