|
1 | 1 | @testset "Model matrix" begin |
2 | | - |
| 2 | + |
3 | 3 | using StatsBase: StatisticalModel |
4 | 4 |
|
5 | 5 | using SparseArrays, DataFrames, Tables |
|
14 | 14 | d.x1p = categorical(d.x1) |
15 | 15 |
|
16 | 16 | d_orig = deepcopy(d) |
17 | | - |
| 17 | + |
18 | 18 | x1 = [5.:8;] |
19 | 19 | x2 = [9.:12;] |
20 | 20 | x3 = [13.:16;] |
|
161 | 161 | z = repeat([:e, :f], inner = 4)) |
162 | 162 | cs = Dict([Symbol(name) => EffectsCoding() for name in names(d)]) |
163 | 163 | d.n = 1.:8 |
164 | | - |
165 | | - |
| 164 | + |
| 165 | + |
166 | 166 | ## No intercept |
167 | 167 | mf = ModelFrame(@formula(n ~ 0 + x), d, contrasts=cs) |
168 | 168 | mm = ModelMatrix(mf) |
|
182 | 182 | mm = ModelMatrix(mf) |
183 | 183 | @test all(mm.m .== ifelse.(d.x .== :a, -1, 1)) |
184 | 184 | @test coefnames(mf) == ["x: b"] |
185 | | - |
186 | | - |
| 185 | + |
| 186 | + |
187 | 187 | ## No first-order term for interaction |
188 | 188 | mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs) |
189 | 189 | mm = ModelMatrix(mf) |
|
197 | 197 | 1 0 1] |
198 | 198 | @test mm.m == ModelMatrix{sparsetype}(mf).m |
199 | 199 | @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"] |
200 | | - |
| 200 | + |
201 | 201 | ## When both terms of interaction are non-redundant: |
202 | 202 | mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs) |
203 | 203 | mm = ModelMatrix(mf) |
|
218 | 218 | mm = ModelMatrix(mf) |
219 | 219 | @test mm.m == Matrix(1.0I, 8, 8) |
220 | 220 | @test mm.m == ModelMatrix{sparsetype}(mf).m |
221 | | - |
| 221 | + |
222 | 222 | # two two-way interactions, with no lower-order term. both are promoted in |
223 | 223 | # first (both x and y), but only the old term (x) in the second (because |
224 | 224 | # dropping x gives z which isn't found elsewhere, but dropping z gives x |
|
237 | 237 | @test coefnames(mf) == ["x: a & y: c", "x: b & y: c", |
238 | 238 | "x: a & y: d", "x: b & y: d", |
239 | 239 | "x: a & z: f", "x: b & z: f"] |
240 | | - |
| 240 | + |
241 | 241 | # ...and adding a three-way interaction, only the shared term (x) is promoted. |
242 | 242 | # this is because dropping x gives y&z which isn't present, but dropping y or z |
243 | 243 | # gives x&z or x&z respectively, which are both present. |
|
256 | 256 | "x: a & y: d", "x: b & y: d", |
257 | 257 | "x: a & z: f", "x: b & z: f", |
258 | 258 | "x: a & y: d & z: f", "x: b & y: d & z: f"] |
259 | | - |
| 259 | + |
260 | 260 | # two two-way interactions, with common lower-order term. the common term x is |
261 | 261 | # promoted in both (along with lower-order term), because in every case, when |
262 | 262 | # x is dropped, the remaining terms (1, y, and z) aren't present elsewhere. |
|
274 | 274 | @test coefnames(mf) == ["x: a", "x: b", |
275 | 275 | "x: a & y: d", "x: b & y: d", |
276 | 276 | "x: a & z: f", "x: b & z: f"] |
277 | | - |
278 | | - |
| 277 | + |
| 278 | + |
279 | 279 | ## FAILS: When both terms are non-redundant and intercept is PRESENT |
280 | 280 | ## (not fully redundant). Ideally, would drop last column. Might make sense |
281 | 281 | ## to warn about this, and suggest recoding x and y into a single variable. |
|
286 | 286 | 1 0 0 0] |
287 | 287 | @test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c", |
288 | 288 | "x: a & y: d", "x: b & y: d"] |
289 | | - |
| 289 | + |
290 | 290 | ## note that R also does not detect this automatically. it's left to glm et al. |
291 | 291 | ## to detect numerically when the model matrix is rank deficient, which is hard |
292 | 292 | ## to do correctly. |
|
343 | 343 | x = repeat([:a, :b], outer = 4), |
344 | 344 | y = repeat([:c, :d], inner = 2, outer = 2), |
345 | 345 | z = repeat([:e, :f], inner = 4)) |
346 | | - |
| 346 | + |
347 | 347 | f = apply_schema(@formula(r ~ 1 + w*x*y*z), schema(d)) |
348 | 348 | modelmatrix(f, d) |
349 | 349 | @test reduce(vcat, last.(modelcols.(Ref(f), Tables.rowtable(d)))') == modelmatrix(f,d) |
|
355 | 355 | x = repeat([:a, :b], outer = 4), |
356 | 356 | y = repeat([:c, :d], inner = 2, outer = 2), |
357 | 357 | z = repeat([:e, :f], inner = 4)) |
358 | | - |
| 358 | + |
359 | 359 | f = @formula(r ~ 1 + w*x*y*z) |
360 | 360 |
|
361 | 361 | mm1 = modelmatrix(f, d) |
|
375 | 375 | C=repeat(['L','H'], inner=4)) |
376 | 376 |
|
377 | 377 | contrasts = Dict(:A=>HelmertCoding(), :B=>HelmertCoding(), :C=>HelmertCoding()) |
378 | | - |
379 | | - |
| 378 | + |
| 379 | + |
380 | 380 |
|
381 | 381 | mf = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl) |
382 | 382 | mf_helm = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl, contrasts = contrasts) |
383 | 383 |
|
384 | 384 | @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm)) |
385 | | - |
| 385 | + |
386 | 386 | mf_helm2 = setcontrasts!(ModelFrame(@formula(Y ~ 1 + A*B*C), tbl), contrasts) |
387 | 387 |
|
388 | 388 | @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm2)) |
389 | 389 | @test modelmatrix(mf_helm) == modelmatrix(mf_helm2) |
390 | | - |
| 390 | + |
391 | 391 | end |
392 | 392 | end |
393 | 393 |
|
|
402 | 402 | f = apply_schema(@formula(0 ~ a&b&c), schema(t)) |
403 | 403 | @test vec(modelcols(f.rhs, t)) == modelcols.(Ref(f.rhs), Tables.rowtable(t)) |
404 | 404 | end |
405 | | - |
| 405 | + |
| 406 | + @testset "#112. coefnames should return same type for all rhs: $(f)" for f in [ |
| 407 | + @formula(y ~ 1), |
| 408 | + @formula(y ~ x1 + 0), |
| 409 | + @formula(y ~ x1), |
| 410 | + @formula(y ~ x1 + x2), |
| 411 | + ] |
| 412 | + df = (y = [1.0, 1.0], x1 = [1, 2], x2 = ["A", "B"]) |
| 413 | + _f = apply_schema(f, schema(f, df)) |
| 414 | + @test coefnames(_f.rhs) isa Vector{String} |
| 415 | + end |
406 | 416 | end |
0 commit comments