Skip to content

Commit 6409eee

Browse files
authored
Add functions dataset and datasets (#254)
* Add non-exported dataset and datasets functions, depend on Feather * Change test scripts to use dataset
1 parent c39e62b commit 6409eee

File tree

10 files changed

+58
-44
lines changed

10 files changed

+58
-44
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ version = "2.2.0"
77
BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
88
BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
99
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
10+
Feather = "becb17da-46f6-5d3c-ad1b-1c5fe96bc73c"
1011
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
1112
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1213
NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
@@ -29,6 +30,7 @@ TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
2930
BlockArrays = "0.5, 0.6, 0.7, 0.8, 0.9, 0.10, 0.11"
3031
BlockDiagonals = "0.1"
3132
Distributions = "0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22"
33+
Feather = "0.5"
3234
GLM = "1"
3335
NLopt = "0.5"
3436
NamedArrays = "0.9"

src/MixedModels.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module MixedModels
33
using BlockArrays
44
using BlockDiagonals
55
using Distributions
6+
using Feather
67
using GLM
78
using LinearAlgebra
89
using NamedArrays

src/utilities.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,25 @@ function replicate(f::Function, n::Integer; use_threads=false)
115115
end
116116
results
117117
end
118+
119+
"""
120+
dataset(nm)
121+
122+
Return the data frame of test data set named `nm`, which can be a `String` or `Symbol`
123+
"""
124+
function dataset(nm::AbstractString)
125+
path = joinpath(TestData, nm * ".feather")
126+
if !isfile(path)
127+
throw(ArgumentError(
128+
"Dataset \"$nm\" is not available.\nUse MixedModels.datasets() for available names."))
129+
end
130+
Feather.read(path)
131+
end
132+
dataset(nm::Symbol) = dataset(string(nm))
133+
134+
"""
135+
datasets()
136+
137+
Return a vector of names of the available test data sets
138+
"""
139+
datasets() = first.(Base.Filesystem.splitext.(filter(Base.Fix2(endswith, ".feather"), readdir(TestData))))

test/FactorReTerm.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,12 @@ using Test
99

1010
const LMM = LinearMixedModel
1111

12-
data(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
13-
data(nm::Symbol) = data(string(nm))
14-
1512
@testset "scalarReMat" begin
16-
ds = data("dyestuff")
13+
ds = MixedModels.dataset("dyestuff")
1714
f1 = @formula(yield ~ 1 + (1|batch))
1815
y1, Xs1 = modelcols(apply_schema(f1, schema(ds), LMM), ds)
1916
sf = Xs1[2]
20-
psts = data("pastes")
17+
psts = MixedModels.dataset("pastes")
2118
f2 = @formula(strength ~ 1 + (1|sample) + (1|batch))
2219
y2, Xs2 = modelcols(apply_schema(f2, schema(psts), LMM), psts)
2320
sf1 = Xs2[2]
@@ -83,7 +80,7 @@ data(nm::Symbol) = data(string(nm))
8380
end
8481

8582
@testset "RandomEffectsTerm" begin
86-
slp = data("sleepstudy")
83+
slp = MixedModels.dataset("sleepstudy")
8784
contrasts = Dict{Symbol,Any}()
8885

8986
@testset "Detect same variable as blocking and experimental" begin
@@ -102,7 +99,7 @@ end
10299

103100
@testset "Categorical Blocking Variable" begin
104101
# deepcopy because we're going to modify it
105-
slp = deepcopy(data("sleepstudy"))
102+
slp = deepcopy(MixedModels.dataset("sleepstudy"))
106103
contrasts = Dict{Symbol,Any}()
107104
f = @formula(reaction ~ 1 + (1|subj))
108105

test/UniformBlockDiagonal.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ using SparseArrays
77
using StatsModels
88
using Test
99

10-
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, nm * ".feather"))
11-
dat(nm::Symbol) = dat(string(nm))
12-
1310
const LMM = LinearMixedModel
1411

1512
@testset "UBlk" begin
@@ -59,7 +56,7 @@ const LMM = LinearMixedModel
5956

6057
@testset "updateL" begin
6158
@test ones(2, 2) == MixedModels.rankUpdate!(Hermitian(zeros(2, 2)), ones(2))
62-
d3 = dat(:d3)
59+
d3 = MixedModels.dataset(:d3)
6360
sch = schema(d3)
6461
vf1 = modelcols(apply_schema(@formula(y ~ 1 + u + (1+u|g)), sch, LMM), d3)[2][2]
6562
vf2 = modelcols(apply_schema(@formula(y ~ 1 + u + (1+u|h)), sch, LMM), d3)[2][2]

test/fit.jl

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,27 @@
11
using MixedModels, Feather, Test
22

3-
data(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4-
5-
data(nm::Symbol) = data(string(nm))
6-
73
@testset "linear" begin
8-
m1 = fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff))
4+
m1 = fit(MixedModel, @formula(yield ~ 1 + (1|batch)), MixedModels.dataset(:dyestuff))
95
@test first(m1.θ) 0.7525806757718846 rtol=1.0e-5
106
end
117

128
@testset "generalized" begin
139
gm1 = fit(MixedModel, @formula(use ~ 1 + urban + livch + age + abs2(age) + (1|dist)),
14-
data(:contra), Bernoulli())
10+
MixedModels.dataset(:contra), Bernoulli())
1511
@test deviance(gm1) 2372.7286 atol=1.0e-3
1612
end
1713

1814
@testset "Normal-IdentityLink" begin
19-
@test isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff), Normal()),
15+
@test isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), MixedModels.dataset(:dyestuff), Normal()),
2016
LinearMixedModel)
2117
@test_throws(ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink"),
2218
fit(GeneralizedLinearMixedModel,
2319
@formula(yield ~ 1 + (1|batch)),
24-
data(:dyestuff)))
20+
MixedModels.dataset(:dyestuff)))
2521
end
2622

2723
@testset "Normal Distribution GLMM" begin
28-
@test_broken(isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), data(:dyestuff),
24+
@test_broken(isa(fit(MixedModel, @formula(yield ~ 1 + (1|batch)), MixedModels.dataset(:dyestuff),
2925
Normal(), LogLink),
3026
GeneralizedLinearMixedModel))
3127
end

test/missing.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
using Feather, MixedModels, Test
22

3-
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4-
dat(nm::Symbol) = dat(string(nm))
53
# deepcopy because we're going to modify it
6-
slp = deepcopy(dat(:sleepstudy))
4+
slp = deepcopy(MixedModels.dataset(:sleepstudy))
75
slp[!,:days] = Array{Union{Missing, Float64},1}(slp[!,:days])
86
slp[1,:days] = missing
97

@@ -21,13 +19,13 @@ slp[1,:days] = missing
2119
@testset "Missing Omit" begin
2220
@testset "Missing from unused variables" begin
2321
# missing from unused variables should have no impact
24-
m1 = fit(MixedModel, @formula(reaction ~ 1 + (1|subj)), dat(:sleepstudy))
22+
m1 = fit(MixedModel, @formula(reaction ~ 1 + (1|subj)), MixedModels.dataset(:sleepstudy))
2523
m1_missing = fit(MixedModel, @formula(reaction ~ 1 + (1|subj)), slp)
2624
@test isapprox(m1.θ, m1_missing.θ, rtol=1.0e-12)
2725
end
2826

2927
@testset "Missing from used variables" begin
30-
m1 = fit(MixedModel, @formula(reaction ~ 1 + days + (1|subj)), dat(:sleepstudy))
28+
m1 = fit(MixedModel, @formula(reaction ~ 1 + days + (1|subj)), MixedModels.dataset(:sleepstudy))
3129
m1_missing = fit(MixedModel, @formula(reaction ~ 1 + days + (1|subj)), slp)
3230
@test nobs(m1) - nobs(m1_missing) == 1
3331
end

test/pirls.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
using DataFrames, Feather, LinearAlgebra, MixedModels, Test
22

3-
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, string(nm, ".feather")))
4-
dat(nm::Symbol) = dat(string(nm))
5-
63
@testset "contra" begin
7-
contra = dat(:contra)
4+
contra = MixedModels.dataset(:contra)
85
contraform = @formula(use ~ 1+age+abs2(age)+urban+livch+(1|urbdist))
96
gm0 = fit(MixedModel, contraform, contra, Bernoulli(), fast=true);
107
@test gm0.lowerbd == zeros(1)
@@ -35,7 +32,7 @@ dat(nm::Symbol) = dat(string(nm))
3532
end
3633

3734
@testset "cbpp" begin
38-
cbpp = dat(:cbpp)
35+
cbpp = MixedModels.dataset(:cbpp)
3936
gm2 = fit(MixedModel, @formula((incid/hsz) ~ 1 + period + (1|herd)), cbpp, Binomial(), wts=float(cbpp.hsz))
4037
@test deviance(gm2,true) 100.09585619892968 atol=0.0001
4138
@test sum(abs2, gm2.u[1]) 9.723054788538546 atol=0.0001
@@ -48,7 +45,7 @@ end
4845

4946
@testset "verbagg" begin
5047
gm3 = fit(MixedModel, @formula(r2 ~ 1+anger+gender+btype+situ+(1|subj)+(1|item)),
51-
dat(:verbagg), Bernoulli())
48+
MixedModels.dataset(:verbagg), Bernoulli())
5249
@test deviance(gm3) 8151.40 rtol=1e-5
5350
@test lowerbd(gm3) == vcat(fill(-Inf, 6), zeros(2))
5451
@test fitted(gm3) == predict(gm3)
@@ -59,7 +56,7 @@ end
5956

6057
@testset "grouseticks" begin
6158
center(v::AbstractVector) = v .- (sum(v) / length(v))
62-
grouseticks = dat(:grouseticks)
59+
grouseticks = MixedModels.dataset(:grouseticks)
6360
grouseticks.ch = center(grouseticks.height)
6461
gm4 = fit(MixedModel, @formula(ticks ~ 1+year+ch+ (1|index) + (1|brood) + (1|location)),
6562
grouseticks, Poisson(), fast=true) # fails in pirls! with fast=false

test/pls.jl

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
using BlockArrays, DataFrames, Feather, LinearAlgebra, MixedModels, NamedArrays
22
using Random, SparseArrays, Statistics, Tables, Test
33

4-
dat(nm::AbstractString) = Feather.read(joinpath(MixedModels.TestData, nm * ".feather"))
5-
dat(nm::Symbol) = dat(string(nm))
6-
74
const LMM = LinearMixedModel
85

96
@testset "Dyestuff" begin
10-
ds = dat(:dyestuff)
7+
ds = MixedModels.dataset(:dyestuff)
118
fm1 = LMM(@formula(yield ~ 1 + (1|batch)), ds)
129

1310
@test length(fm1.allterms) == 3
@@ -86,7 +83,7 @@ const LMM = LinearMixedModel
8683
end
8784

8885
@testset "Dyestuff2" begin
89-
ds2 = dat(:dyestuff2)
86+
ds2 = MixedModels.dataset(:dyestuff2)
9087
fm = fit(MixedModel, @formula(yield ~ 1 + (1|batch)), ds2)
9188
@test lowerbd(fm) == zeros(1)
9289
show(IOBuffer(), fm)
@@ -97,12 +94,12 @@ end
9794
@test stderror(fm) [0.6669857396443261]
9895
@test coef(fm) [5.6656]
9996
@test logdet(fm) 0.0
100-
refit!(fm, float(dat(:dyestuff)[!, :yield]))
97+
refit!(fm, float(MixedModels.dataset(:dyestuff)[!, :yield]))
10198
@test objective(fm) 327.3270598811428 atol=0.001
10299
end
103100

104101
@testset "penicillin" begin
105-
pen = dat(:penicillin)
102+
pen = MixedModels.dataset(:penicillin)
106103
fm = LMM(@formula(diameter ~ 1 + (1 | plate) + (1 | sample)), pen);
107104
@test size(fm) == (144, 1, 30, 2)
108105
@test fm.θ == ones(2)
@@ -126,7 +123,7 @@ end
126123
end
127124

128125
@testset "pastes" begin
129-
fm = LMM(@formula(strength ~ (1|sample) + (1|batch)), dat(:pastes))
126+
fm = LMM(@formula(strength ~ (1|sample) + (1|batch)), MixedModels.dataset(:pastes))
130127
@test size(fm) == (60, 1, 40, 2)
131128
@test fm.θ == ones(2)
132129
@test lowerbd(fm) == zeros(2)
@@ -146,7 +143,7 @@ end
146143
end
147144

148145
@testset "InstEval" begin
149-
insteval = dat(:insteval)
146+
insteval = MixedModels.dataset(:insteval)
150147
fm1 = LMM(@formula(y ~ 1 + service + (1|s) + (1|d) + (1|dept)), insteval)
151148
@test size(fm1) == (73421, 2, 4114, 3)
152149
@test fm1.θ == ones(3)
@@ -170,7 +167,7 @@ end
170167
end
171168

172169
@testset "sleep" begin
173-
slp = dat(:sleepstudy)
170+
slp = MixedModels.dataset(:sleepstudy)
174171
fm = LinearMixedModel(@formula(reaction ~ 1 + days + (1+days|subj)), slp);
175172
@test lowerbd(fm) == [0.0, -Inf, 0.0]
176173
A11 = fm.A[Block(1,1)]
@@ -294,7 +291,7 @@ end
294291
end
295292

296293
@testset "d3" begin
297-
fm = updateL!(LMM(@formula(y ~ 1 + u + (1+u|g) + (1+u|h) + (1+u|i)), dat(:d3)));
294+
fm = updateL!(LMM(@formula(y ~ 1 + u + (1+u|g) + (1+u|h) + (1+u|i)), MixedModels.dataset(:d3)));
298295
@test pwrss(fm) 5.1261847180180885e6 rtol = 1e-6
299296
@test objective(fm) 901641.2930413672 rtol = 1e-6
300297
fit!(fm)
@@ -305,7 +302,7 @@ end
305302
end
306303

307304
@testset "simulate!" begin
308-
ds = dat(:dyestuff)
305+
ds = MixedModels.dataset(:dyestuff)
309306
fm = fit(MixedModel, @formula(yield ~ 1 + (1|batch)), ds)
310307
refit!(simulate!(Random.MersenneTwister(1234321), fm))
311308
@test deviance(fm) 339.0218639362958 atol=0.001

test/utilities.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,10 @@ end
2121

2222
@test all(sort!(single_thread) .== sort!(multi_thread))
2323
end
24+
25+
@testset "datasets" begin
26+
@test isa(MixedModels.datasets(), Vector{String})
27+
@test size(MixedModels.dataset(:dyestuff)) == (30, 2)
28+
@test size(MixedModels.dataset("dyestuff")) == (30, 2)
29+
@test_throws ArgumentError MixedModels.dataset(:foo)
30+
end

0 commit comments

Comments
 (0)