Skip to content

Commit 359de04

Browse files
committed
Restructure to match rest of package
1 parent 1070ba3 commit 359de04

File tree

2 files changed

+89
-16
lines changed

2 files changed

+89
-16
lines changed

src/mca.jl

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ https://maths.cnam.fr/IMG/pdf/ClassMCA_cle825cfc.pdf
1616
"""
1717
Correspondence Analysis
1818
"""
19-
struct CA{T<:Real} <: LinearDimensionalityReduction
19+
mutable struct CA{T<:Real} <: LinearDimensionalityReduction
2020

2121
# The data matrix
2222
Z::Array{T}
@@ -41,7 +41,9 @@ struct CA{T<:Real} <: LinearDimensionalityReduction
4141
I::Vector{T}
4242
end
4343

44-
function CA(X, d::Int)
44+
# Constructor
45+
46+
function CA(X)
4547

4648
# Convert to proportions
4749
X = X ./ sum(X)
@@ -58,14 +60,41 @@ function CA(X, d::Int)
5860
Wc = Diagonal(sqrt.(c))
5961
SR = Wr \ R / Wc
6062

63+
T = eltype(X)
64+
return CA(X, R, r, c, SR, zeros(T, 0, 0), zeros(T, 0, 0), zeros(T, 0))
65+
end
66+
67+
function fit!(ca::CA, d::Int)
68+
6169
# Get the object factor scores (F) and variable factor scores (G).
62-
P, D, Q = svd(SR)
70+
P, D, Q = svd(ca.SR)
6371
Dq = Diagonal(D)[:, 1:d]
64-
F = Wr \ P * Dq
65-
G = Wc \ Q * Dq
66-
I = D .^ 2
6772

68-
return CA(X, R, r, c, SR, F, G, I)
73+
Wr = Diagonal(sqrt.(ca.rm))
74+
Wc = Diagonal(sqrt.(ca.cm))
75+
ca.F = Wr \ P * Dq
76+
ca.G = Wc \ Q * Dq
77+
78+
# Get the eigenvalues
79+
ca.I = D .^ 2
80+
end
81+
82+
function fit(::Type{CA}; X::AbstractMatrix, d::Int = 5)
83+
ca = CA(X)
84+
fit!(ca, d)
85+
return ca::CA
86+
end
87+
88+
"""
89+
ca
90+
91+
Fit a correspondence analysis using the data array `X` whose rows are
92+
the objects and columns are the variables. The first `d` components
93+
are retained.
94+
"""
95+
function ca(X, d)
96+
c = fit(CA, X, d)
97+
return c
6998
end
7099

71100
objectscores(ca::CA) = ca.F
@@ -75,7 +104,7 @@ inertia(ca::CA) = ca.I
75104
"""
76105
Multiple Correspondence Analysis
77106
"""
78-
struct MCA{T<:Real} <: LinearDimensionalityReduction
107+
mutable struct MCA{T<:Real} <: LinearDimensionalityReduction
79108

80109
# The underlying corresponence analysis
81110
C::CA{T}
@@ -93,6 +122,12 @@ struct MCA{T<:Real} <: LinearDimensionalityReduction
93122
# each variable.
94123
Gv::Vector{Matrix{T}}
95124

125+
# Number of nominal variables
126+
K::Int
127+
128+
# Total number of categories in all variables
129+
J::Int
130+
96131
# Eigenvalues
97132
unadjusted_eigs::Vector{Float64}
98133
benzecri_eigs::Vector{Float64}
@@ -134,7 +169,9 @@ function get_eigs(I, K, J)
134169
return unadjusted, ben ./ sum(ben), ben ./ gt
135170
end
136171

137-
function MCA(Z, d::Int; vnames = [])
172+
# constructor
173+
174+
function MCA(Z; vnames = [])
138175

139176
if length(vnames) == 0
140177
vnames = ["v$(j)" for j = 1:size(Z, 2)]
@@ -143,14 +180,50 @@ function MCA(Z, d::Int; vnames = [])
143180
# Get the indicator matrix
144181
X, rd, dr = make_indicators(Z)
145182

146-
C = CA(X, d)
183+
# Create the underlying correspondence analysis value
184+
C = CA(X)
185+
186+
# Number of nominal variables
187+
K = size(Z, 2)
188+
189+
# Total number of categories in all variables
190+
J = size(X, 2)
191+
192+
return MCA(C, vnames, rd, dr, Matrix{Float64}[], K, J, zeros(0), zeros(0), zeros(0))
193+
end
194+
195+
"""
196+
mca
197+
198+
Fit a multiple correspondence analysis using the columns of `Z` as the
199+
variables. The first `d` components are retained. If `Z` is a
200+
dataframe then the column names are used as variable names, otherwise
201+
variable names may be provided as `vnames`.
202+
"""
203+
function mca(Z, d::Int; vnames = [])
204+
m = MCA(Z; vnames)
205+
fit!(m, d)
206+
return m
207+
end
208+
209+
function fit(::Type{MCA}, Z::AbstractMatrix, d::Int; vnames = [])
210+
return mca(Z, d; vnames)
211+
end
212+
213+
function fit!(mca::MCA, d::Int)
214+
215+
fit!(mca.C, d)
147216

148217
# Split the variable scores into separate arrays for each variable.
149-
Gv = xsplit(C.G, rd)
218+
mca.Gv = xsplit(mca.C.G, mca.rd)
219+
220+
una, ben, gra = get_eigs(mca.C.I, mca.J, mca.K)
150221

151-
una, ben, gra = get_eigs(C.I, size(Z, 2), size(X, 2))
222+
mca.unadjusted_eigs = una
223+
mca.benzecri_eigs = ben
224+
mca.greenacre_eigs = gra
152225

153-
return MCA(C, vnames, rd, dr, Gv, una, ben, gra)
226+
return mca
154227
end
155228

156229
# Create an indicator matrix corresponding to the distinct
@@ -238,10 +311,10 @@ end
238311
# ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
239312
# ax.grid(true)
240313

241-
# Set up the colormap
314+
# Set up the colormap
242315
# cm = get(kwargs, :cmap, PyPlot.get_cmap("tab10"))
243316

244-
# Set up the axis limits
317+
# Set up the axis limits
245318
# mn = 1.2 * minimum(mca.C.G, dims = 1)
246319
# mx = 1.2 * maximum(mca.C.G, dims = 1)
247320
# xlim = get(kwargs, :xlim, [mn[x], mx[x]])

test/mca.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ using MultivariateStats, DataFrames, Test
88
:V3 => ["D", "D", "D", "C", "D", "C", "D", "C"],
99
)
1010

11-
m = MCA(da, 3; vnames = names(da))
11+
m = mca(da, 3; vnames = names(da))
1212
F = objectscores(m)
1313
G = variablescores(m.C)
1414

0 commit comments

Comments
 (0)