@@ -16,7 +16,7 @@ https://maths.cnam.fr/IMG/pdf/ClassMCA_cle825cfc.pdf
16
16
"""
17
17
Correspondence Analysis
18
18
"""
19
- struct CA{T<: Real } <: LinearDimensionalityReduction
19
+ mutable struct CA{T<: Real } <: LinearDimensionalityReduction
20
20
21
21
# The data matrix
22
22
Z:: Array{T}
@@ -41,7 +41,9 @@ struct CA{T<:Real} <: LinearDimensionalityReduction
41
41
I:: Vector{T}
42
42
end
43
43
44
- function CA (X, d:: Int )
44
+ # Constructor
45
+
46
+ function CA (X)
45
47
46
48
# Convert to proportions
47
49
X = X ./ sum (X)
@@ -58,14 +60,41 @@ function CA(X, d::Int)
58
60
Wc = Diagonal (sqrt .(c))
59
61
SR = Wr \ R / Wc
60
62
63
+ T = eltype (X)
64
+ return CA (X, R, r, c, SR, zeros (T, 0 , 0 ), zeros (T, 0 , 0 ), zeros (T, 0 ))
65
+ end
66
+
67
+ function fit! (ca:: CA , d:: Int )
68
+
61
69
# Get the object factor scores (F) and variable factor scores (G).
62
- P, D, Q = svd (SR)
70
+ P, D, Q = svd (ca . SR)
63
71
Dq = Diagonal (D)[:, 1 : d]
64
- F = Wr \ P * Dq
65
- G = Wc \ Q * Dq
66
- I = D .^ 2
67
72
68
- return CA (X, R, r, c, SR, F, G, I)
73
+ Wr = Diagonal (sqrt .(ca. rm))
74
+ Wc = Diagonal (sqrt .(ca. cm))
75
+ ca. F = Wr \ P * Dq
76
+ ca. G = Wc \ Q * Dq
77
+
78
+ # Get the eigenvalues
79
+ ca. I = D .^ 2
80
+ end
81
+
82
+ function fit (:: Type{CA} ; X:: AbstractMatrix , d:: Int = 5 )
83
+ ca = CA (X)
84
+ fit! (ca, d)
85
+ return ca:: CA
86
+ end
87
+
88
+ """
89
+ ca
90
+
91
+ Fit a correspondence analysis using the data array `X` whose rows are
92
+ the objects and columns are the variables. The first `d` components
93
+ are retained.
94
+ """
95
+ function ca (X, d)
96
+ c = fit (CA, X, d)
97
+ return c
69
98
end
70
99
71
100
objectscores (ca:: CA ) = ca. F
@@ -75,7 +104,7 @@ inertia(ca::CA) = ca.I
75
104
"""
76
105
Multiple Correspondence Analysis
77
106
"""
78
- struct MCA{T<: Real } <: LinearDimensionalityReduction
107
+ mutable struct MCA{T<: Real } <: LinearDimensionalityReduction
79
108
80
109
# The underlying corresponence analysis
81
110
C:: CA{T}
@@ -93,6 +122,12 @@ struct MCA{T<:Real} <: LinearDimensionalityReduction
93
122
# each variable.
94
123
Gv:: Vector{Matrix{T}}
95
124
125
+ # Number of nominal variables
126
+ K:: Int
127
+
128
+ # Total number of categories in all variables
129
+ J:: Int
130
+
96
131
# Eigenvalues
97
132
unadjusted_eigs:: Vector{Float64}
98
133
benzecri_eigs:: Vector{Float64}
@@ -134,7 +169,9 @@ function get_eigs(I, K, J)
134
169
return unadjusted, ben ./ sum (ben), ben ./ gt
135
170
end
136
171
137
- function MCA (Z, d:: Int ; vnames = [])
172
+ # constructor
173
+
174
+ function MCA (Z; vnames = [])
138
175
139
176
if length (vnames) == 0
140
177
vnames = [" v$(j) " for j = 1 : size (Z, 2 )]
@@ -143,14 +180,50 @@ function MCA(Z, d::Int; vnames = [])
143
180
# Get the indicator matrix
144
181
X, rd, dr = make_indicators (Z)
145
182
146
- C = CA (X, d)
183
+ # Create the underlying correspondence analysis value
184
+ C = CA (X)
185
+
186
+ # Number of nominal variables
187
+ K = size (Z, 2 )
188
+
189
+ # Total number of categories in all variables
190
+ J = size (X, 2 )
191
+
192
+ return MCA (C, vnames, rd, dr, Matrix{Float64}[], K, J, zeros (0 ), zeros (0 ), zeros (0 ))
193
+ end
194
+
195
+ """
196
+ mca
197
+
198
+ Fit a multiple correspondence analysis using the columns of `Z` as the
199
+ variables. The first `d` components are retained. If `Z` is a
200
+ dataframe then the column names are used as variable names, otherwise
201
+ variable names may be provided as `vnames`.
202
+ """
203
+ function mca (Z, d:: Int ; vnames = [])
204
+ m = MCA (Z; vnames)
205
+ fit! (m, d)
206
+ return m
207
+ end
208
+
209
+ function fit (:: Type{MCA} , Z:: AbstractMatrix , d:: Int ; vnames = [])
210
+ return mca (Z, d; vnames)
211
+ end
212
+
213
+ function fit! (mca:: MCA , d:: Int )
214
+
215
+ fit! (mca. C, d)
147
216
148
217
# Split the variable scores into separate arrays for each variable.
149
- Gv = xsplit (C. G, rd)
218
+ mca. Gv = xsplit (mca. C. G, mca. rd)
219
+
220
+ una, ben, gra = get_eigs (mca. C. I, mca. J, mca. K)
150
221
151
- una, ben, gra = get_eigs (C. I, size (Z, 2 ), size (X, 2 ))
222
+ mca. unadjusted_eigs = una
223
+ mca. benzecri_eigs = ben
224
+ mca. greenacre_eigs = gra
152
225
153
- return MCA (C, vnames, rd, dr, Gv, una, ben, gra)
226
+ return mca
154
227
end
155
228
156
229
# Create an indicator matrix corresponding to the distinct
@@ -238,10 +311,10 @@ end
238
311
# ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
239
312
# ax.grid(true)
240
313
241
- # Set up the colormap
314
+ # Set up the colormap
242
315
# cm = get(kwargs, :cmap, PyPlot.get_cmap("tab10"))
243
316
244
- # Set up the axis limits
317
+ # Set up the axis limits
245
318
# mn = 1.2 * minimum(mca.C.G, dims = 1)
246
319
# mx = 1.2 * maximum(mca.C.G, dims = 1)
247
320
# xlim = get(kwargs, :xlim, [mn[x], mx[x]])
0 commit comments