Skip to content

Commit 1c06896

Browse files
paldaydmbates
andauthored
new rePCA: now on correlations and with optional extra info (#264)
* Added PCA property with correlation or covariance components and loadings. * fix rowsnormalization for rows with all zeros * PCA methods for ReMat, show method for PCA * properties for PCA structure * new rePCA * remove redundant, misplaced definitions * PCA methods for model and set appropriate signatures * GLMM rePCA Co-authored-by: Douglas Bates <[email protected]>
1 parent c87e8d1 commit 1c06896

File tree

6 files changed

+164
-15
lines changed

6 files changed

+164
-15
lines changed

src/generalizedlinearmixedmodel.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol)
373373
m.β
374374
elseif s (, :sigma)
375375
sdest(m)
376-
elseif s (:A, :L, , :lowerbd, :optsum, :X, :reterms, :feterms, :formula, :σs, :σρs)
376+
elseif s (:A, :L, , :lowerbd, :PCA, :rePCA, :optsum, :X, :reterms, :feterms, :formula, :σs, :σρs)
377377
getproperty(m.LMM, s)
378378
elseif s == :y
379379
m.resp.y
@@ -566,6 +566,8 @@ for f in (
566566
:feL,
567567
:(LinearAlgebra.logdet),
568568
:lowerbd,
569+
:PCA,
570+
:rePCA,
569571
:(StatsModels.modelmatrix),
570572
:(StatsBase.vcov),
571573
:σs,

src/linearmixedmodel.jl

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ function Base.getproperty(m::LinearMixedModel, s::Symbol)
450450
filter(Base.Fix2(isa, FeMat), getfield(m, :allterms))
451451
elseif s == :objective
452452
objective(m)
453+
elseif s == :PCA
454+
NamedTuple{fnames(m)}(PCA.(m.reterms))
453455
elseif s == :pvalues
454456
ccdf.(Chisq(1), abs2.(coef(m) ./ stderror(m)))
455457
elseif s == :reterms
@@ -535,6 +537,7 @@ Base.propertynames(m::LinearMixedModel, private = false) = (
535537
:lowerbd,
536538
:X,
537539
:y,
540+
:PCA,
538541
:rePCA,
539542
:reterms,
540543
:feterms,
@@ -616,11 +619,31 @@ end
616619

617620
LinearAlgebra.rank(m::LinearMixedModel) = first(m.feterms).rank
618621

619-
function rePCA(m::LinearMixedModel{T}) where {T}
620-
re = m.reterms
621-
nt = length(re)
622-
tup = ntuple(i -> normalized_variance_cumsum(re[i].λ), nt)
623-
NamedTuple{ntuple(i -> re[i].trm.sym, nt),typeof(tup)}(tup)
622+
"""
623+
rePCA(m::LinearMixedModel; corr::Bool=true)
624+
625+
Return a named tuple of the normalized cumulative variance of a principal components
626+
analysis of the random effects covariance matrices or correlation
627+
matrices when `corr` is `true`.
628+
629+
The normalized cumulative variance is the proportion of the variance for the first
630+
principal component, the first two principal components, etc. The last element is
631+
always 1.0 representing the complete proportion of the variance.
632+
"""
633+
function rePCA(m::LinearMixedModel; corr::Bool=true)
634+
pca = PCA.(m.reterms, corr=corr)
635+
NamedTuple{fnames(m)}(getproperty.(pca,:cumvar))
636+
end
637+
638+
"""
639+
PCA(m::LinearMixedModel; corr::Bool=true)
640+
641+
Return a named tuple of the principal components analysis of the random effects
642+
covariance matrices or correlation matrices when `corr` is `true`.
643+
"""
644+
645+
function PCA(m::LinearMixedModel; corr::Bool=true)
646+
NamedTuple{fnames(m)}(PCA.(m.reterms, corr=corr))
624647
end
625648

626649
"""

src/remat.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ function *(adjA::Adjoint{T,<:ReMat{T,S}}, B::ReMat{T,P}) where {T,S,P}
374374
BlockedSparse{T,S,P}(cscmat, reshape(cscmat.nzval, S, :), cscmat.colptr[1:P:(cscmat.n + 1)])
375375
end
376376

377+
PCA(A::ReMat{T,1}; corr::Bool=true) where {T} =
378+
PCA(corr ? reshape(abs.(vec(A.λ)),1,1) : reshape(ones(T,1),1,1), corr=corr)
379+
380+
PCA(A::ReMat{T,S}; corr::Bool=true) where {T,S} = PCA(A.λ, corr=corr)
381+
377382
function reweight!(A::ReMat, sqrtwts::Vector)
378383
if length(sqrtwts) > 0
379384
if A.z === A.wtz

src/utilities.jl

Lines changed: 111 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ function Base.sum!(s::AbstractVector{T}, a::RaggedArray{T}) where {T}
5959
s
6060
end
6161

62-
"""
63-
normalized_variance_cumsum(A::AbstractMatrix)
64-
65-
Return the cumulative sum of the squared singular values of `A` normalized to sum to 1
66-
"""
67-
function normalized_variance_cumsum(A::AbstractMatrix)
68-
vars = cumsum(abs2.(svdvals(A)))
69-
vars ./ last(vars)
62+
function rownormalize!(A::AbstractMatrix)
63+
for r in eachrow(A)
64+
# all zeros arise in zerocorr situations
65+
if !iszero(r)
66+
normalize!(r)
67+
end
68+
end
69+
A
7070
end
7171

7272
"""
@@ -137,3 +137,106 @@ dataset(nm::Symbol) = dataset(string(nm))
137137
Return a vector of names of the available test data sets
138138
"""
139139
datasets() = first.(Base.Filesystem.splitext.(filter(Base.Fix2(endswith, ".feather"), readdir(TestData))))
140+
141+
142+
"""
143+
PCA{T<:AbstractFloat}
144+
145+
Principal Components Analysis
146+
147+
## Fields
148+
149+
* `covcorr` covariance or correlation matrix
150+
* `sv` singular value decomposition
151+
* `corr` is this a correlation matrix
152+
"""
153+
struct PCA{T<:AbstractFloat}
154+
covcor::Symmetric{T,Matrix{T}}
155+
sv::SVD{T,T,Matrix{T}}
156+
corr::Bool
157+
end
158+
159+
"""
160+
PCA(::AbstractMatrix; corr::Bool=true)
161+
PCA(::ReMat; corr::Bool=true)
162+
PCA(::LinearMixedModel; corr::Bool=true)
163+
164+
Constructs a [`MixedModels.PCA`](@ref]) object from a covariance matrix.
165+
166+
For `LinearMixedModel`, a named tuple of PCA on each of the random-effects terms is returned.
167+
168+
If `corr=true`, then the covariance is first standardized to the correlation scale.
169+
"""
170+
171+
function PCA(covfac::AbstractMatrix; corr::Bool=true)
172+
covf = corr ? rownormalize!(copy(covfac)) : copy(covfac)
173+
PCA(Symmetric(covf*covf', :L), svd(covf), corr)
174+
end
175+
176+
function Base.getproperty(pca::PCA, s::Symbol)
177+
if s == :cumvar
178+
cumvv = cumsum(abs2.(pca.sv.S))
179+
cumvv ./ last(cumvv)
180+
elseif s == :loadings
181+
pca.sv.U
182+
else
183+
getfield(pca, s)
184+
end
185+
end
186+
187+
Base.propertynames(pca::PCA, private = false) = (
188+
:covcor,
189+
:sv,
190+
:corr,
191+
:cumvar,
192+
:loadings,
193+
# :rotation,
194+
)
195+
196+
Base.show(pca::PCA;
197+
ndigitsmat=2, ndigitsvec=2, ndigitscum=4,
198+
covcor=true, loadings=true, variances=false, stddevs=false) =
199+
Base.show(Base.stdout, pca,
200+
ndigitsmat=ndigitsmat,
201+
ndigitsvec=ndigitsvec,
202+
ndigitscum=ndigitscum,
203+
covcor=covcor,
204+
loadings=loadings,
205+
variances=variances,
206+
stddevs=stddevs)
207+
208+
function Base.show(io::IO, pca::PCA;
209+
ndigitsmat=2, ndigitsvec=2, ndigitscum=4,
210+
covcor=true, loadings=true, variances=false, stddevs=false)
211+
println(io)
212+
if covcor
213+
println(io,
214+
"Principal components based on ",
215+
pca.corr ? "correlation" : "(relative) covariance",
216+
" matrix")
217+
# only display the lower triangle of symmetric matrix
218+
Base.print_matrix(io, round.(LowerTriangular(pca.covcor), digits=ndigitsmat))
219+
println(io)
220+
end
221+
if stddevs
222+
println(io, "Standard deviations:")
223+
sv = pca.sv
224+
show(io, round.(sv.S, digits=ndigitsvec))
225+
println(io)
226+
end
227+
if variances
228+
println(io, "Variances:")
229+
vv = abs2.(sv.S)
230+
show(io, round.(vv, digits=ndigitsvec))
231+
println(io)
232+
end
233+
println(io, "Normalized cumulative variances:")
234+
show(io, round.(pca.cumvar, digits=ndigitscum))
235+
println(io)
236+
if loadings
237+
println(io, "Component loadings")
238+
Base.print_matrix(io, round.(pca.loadings, digits=ndigitsmat))
239+
end
240+
241+
nothing
242+
end

test/pirls.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ using DataFrames, Feather, LinearAlgebra, MixedModels, Test
2323
@test isnan(gm1.σ)
2424
@test length(gm1.y) == size(gm1.X, 1)
2525
@test in propertynames(gm0)
26+
@testset "GLMM rePCA" begin
27+
@test length(MixedModels.PCA(gm0)) == 1
28+
@test length(MixedModels.rePCA(gm0)) == 1
29+
@test length(gm0.rePCA) == 1
30+
end
2631
# gm0.βθ = vcat(gm0.β, gm0.theta)
2732
# the next three values are not well defined in the optimization
2833
#@test isapprox(logdet(gm1), 75.7217, atol=0.1)

test/pls.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,11 @@ end
159159
resid1 = residuals(fm1);
160160
@test size(resid1) == (73421, )
161161
@test first(resid1) 1.82124 atol=0.00001
162-
@test length(fm1.rePCA) == 3
162+
163+
@testset "PCA" begin
164+
@test length(fm1.rePCA) == 3
165+
@test length(MixedModels.PCA(fm1)) == 3
166+
end
163167

164168
fm2 = fit!(LinearMixedModel(@formula(y ~ 1 + service*dept + (1|s) + (1|d)), insteval))
165169
@test objective(fm2) 237585.5534151694 atol=0.001
@@ -228,6 +232,13 @@ end
228232
@test fmnc.θ == ones(2)
229233
@test lowerbd(fmnc) == zeros(2)
230234

235+
@testset "zerocorr PCA" begin
236+
@test length(fmnc.rePCA) == 1
237+
@test fmnc.rePCA.subj == [0.5, 1.0]
238+
@test MixedModels.PCA(fmnc).subj.loadings == I(2)
239+
@test show(IOBuffer(), MixedModels.PCA(fmnc)) === nothing
240+
end
241+
231242
fit!(fmnc)
232243

233244
@test deviance(fmnc) 1752.0032551398835 atol=0.001

0 commit comments

Comments
 (0)