@@ -213,7 +213,7 @@ Base.getindex(contrasts::ContrastsMatrix{C,T}, rowinds, colinds) where {C,T} =
213
213
# Making a contrast type T only requires that there be a method for
214
214
# contrasts_matrix(T, baseind, n) and optionally termnames(T, levels, baseind)
215
215
# The rest is boilerplate.
216
- for contrastType in [:DummyCoding , :EffectsCoding , :HelmertCoding ]
216
+ for contrastType in [:DummyCoding , :EffectsCoding , :HelmertCoding , :SeqDiffCoding ]
217
217
@eval begin
218
218
mutable struct $ contrastType <: AbstractContrasts
219
219
base:: Any
@@ -368,25 +368,153 @@ function contrasts_matrix(C::HelmertCoding, baseind, n)
368
368
end
369
369
370
370
"""
371
- ContrastsCoding(mat::Matrix[, base[, levels]])
371
+ SeqDiffCoding([base[, levels]])
372
+
373
+ Code each level in order to test "sequential difference" hypotheses, which
374
+ compares each level to the level below it (starting with the second level).
375
+ Specifically, the ``n``th predictor tests the hypothesis that the difference
376
+ between levels ``n`` and ``n+1`` is zero.
377
+
378
+ # Examples
379
+
380
+ ```jldoctest seqdiff
381
+ julia> seqdiff = StatsModels.ContrastsMatrix(SeqDiffCoding(), ["a", "b", "c", "d"]).matrix
382
+ 4×3 Array{Float64,2}:
383
+ -0.75 -0.5 -0.25
384
+ 0.25 -0.5 -0.25
385
+ 0.25 0.5 -0.25
386
+ 0.25 0.5 0.75
387
+ ```
388
+
389
+ The interpretation of sequential difference coding may be hard to see from the
390
+ contrasts matrix itself. The corresponding hypothesis matrix shows a clearer
391
+ picture. From the rows of the hypothesis matrix, we can see that these
392
+ contrasts test the difference between the first and second levels, the second
393
+ and third, and the third and fourth, respectively:
394
+
395
+ ```jldoctest seqdiff
396
+ julia> round.(pinv(seqdiff), digits=2)
397
+ 3×4 Array{Float64,2}:
398
+ -1.0 1.0 -0.0 0.0
399
+ -0.0 -1.0 1.0 -0.0
400
+ 0.0 -0.0 -1.0 1.0
401
+ ```
402
+
403
+ """
404
+ SeqDiffCoding
405
+
406
+ function contrasts_matrix (C:: SeqDiffCoding , baseind, n)
407
+ mat = zeros (n, n- 1 )
408
+ for col in 1 : n- 1
409
+ mat[1 : col, col] .= col- n
410
+ mat[col+ 1 : end , col] .= col
411
+ end
412
+ return mat ./ n
413
+ end
414
+
415
+
416
+ """
417
+ HypothesisCoding(hypotheses::Matrix[, levels])
418
+
419
+ Specify how to code a categorical variable in terms of a *hypothesis matrix*.
420
+ For a variable with ``k`` levels, this should be a ``k-1 \t imes k`` matrix.
421
+ Each row of the matrix corresponds to a hypothesis about the mean
422
+ outcomes under each of the ``k`` levels of the predictor. The entries in the
423
+ row give the weights assigned to each of these ``k`` means, and the
424
+ corresponding predictor in a regression model estimates the weighted sum of
425
+ these cell means.
426
+
427
+ For instance, if we have a variable which has four levels A, B, C, and D, and we
428
+ want to test the hypothesis that the difference between the average outcomes for
429
+ levels A and B is different from zero, the corresponding row of the hypothesis
430
+ matrix would be `[-1, 1, 0, 0]`. Likewise, to test whether the difference
431
+ between B and C is different from zero, the hypothesis vector would be `[0, -1,
432
+ 1, 0]`. To test each "successive difference" hypothesis, the full hypothesis
433
+ matrix would be
434
+
435
+ ```jldoctest hyp
436
+ julia> sdiff_hypothesis = [-1 1 0 0
437
+ 0 -1 1 0
438
+ 0 0 -1 1];
439
+ ```
440
+
441
+ Contrasts are derived the hypothesis matrix by taking the pseudoinverse:
442
+
443
+ ```jldoctest hyp
444
+ julia> sdiff_contrasts = pinv(sdiff_hypothesis)
445
+ 4×3 Array{Float64,2}:
446
+ -0.75 -0.5 -0.25
447
+ 0.25 -0.5 -0.25
448
+ 0.25 0.5 -0.25
449
+ 0.25 0.5 0.75
450
+ ```
451
+
452
+ The above matrix is what is produced by constructing a [`ContrastsMatrix`](@ref) from a
453
+ `HypothesisCoding` instance:
454
+
455
+ ```jldoctest hyp
456
+ julia> StatsModels.ContrastsMatrix(HypothesisCoding(sdiff_hypothesis), ["a", "b", "c", "d"]).matrix
457
+ 4×3 Array{Float64,2}:
458
+ -0.75 -0.5 -0.25
459
+ 0.25 -0.5 -0.25
460
+ 0.25 0.5 -0.25
461
+ 0.25 0.5 0.75
462
+ ```
463
+
464
+ The interpretation of the such "sequential difference" contrasts are clear when
465
+ expressed as a hypothesis matrix, but it is not obvious just from looking at the
466
+ contrasts matrix. For this reason `HypothesisCoding` is preferred for
467
+ specifying custom contrast coding schemes over `ContrastsCoding`.
468
+
469
+ """
470
+ mutable struct HypothesisCoding <: AbstractContrasts
471
+ hypotheses:: Matrix
472
+ contrasts:: Matrix
473
+ base:: Nothing
474
+ levels:: Union{Vector,Nothing}
475
+
476
+ function HypothesisCoding (hypotheses, base, levels)
477
+ contrasts = pinv (hypotheses)
478
+ check_contrasts_size (contrasts, levels)
479
+ new (hypotheses, contrasts, base, levels)
480
+ end
481
+ end
482
+
483
+ HypothesisCoding (mat:: Matrix ; levels= nothing ) =
484
+ HypothesisCoding (mat, nothing , levels)
485
+
486
+ function contrasts_matrix (C:: HypothesisCoding , baseind, n)
487
+ check_contrasts_size (C. contrasts, n)
488
+ C. contrasts
489
+ end
490
+
491
+
492
+ """
493
+ StatsModels.ContrastsCoding(mat::Matrix[, base[, levels]])
372
494
373
495
Coding by manual specification of contrasts matrix. For k levels, the contrasts
374
- must be a k by k-1 Matrix.
496
+ must be a k by k-1 Matrix. The contrasts in this matrix will be copied directly
497
+ into the model matrix; if you want to specify your contrasts as hypotheses (i.e.,
498
+ weights assigned to each group's cell mean), you should use
499
+ [`HypothesisCoding`](@ref) instead.
375
500
"""
376
501
mutable struct ContrastsCoding <: AbstractContrasts
377
502
mat:: Matrix
378
503
base:: Any
379
504
levels:: Union{Vector,Nothing}
380
505
381
506
function ContrastsCoding (mat, base, levels)
382
- if levels != = nothing
383
- check_contrasts_size (mat, length (levels))
384
- end
507
+ Base. depwarn (" `ContrastsCoding(contrasts)` is deprecated and will not be exported" *
508
+ " in the future, use `HypothesisCoding(pinv(contrasts))` instead." ,
509
+ :ContrastsCoding )
510
+ check_contrasts_size (mat, levels)
385
511
new (mat, base, levels)
386
512
end
387
513
end
388
514
389
- check_contrasts_size (mat:: Matrix , n_lev) =
515
+ check_contrasts_size (mat:: Matrix , :: Nothing ) = check_contrasts_size (mat, size (mat,1 ))
516
+ check_contrasts_size (mat:: Matrix , levels:: Vector ) = check_contrasts_size (mat, length (levels))
517
+ check_contrasts_size (mat:: Matrix , n_lev:: Int ) =
390
518
size (mat) == (n_lev, n_lev- 1 ) ||
391
519
throw (ArgumentError (" contrasts matrix wrong size for $n_lev levels. " *
392
520
" Expected $((n_lev, n_lev- 1 )) , got $(size (mat)) " ))
0 commit comments