Skip to content

Commit f823c76

Browse files
authored
Merge pull request #32 from JuliaAI/adoints-not-transpose
Use adjoints not transposes
2 parents 99cf02c + e5a0450 commit f823c76

File tree

2 files changed

+70
-72
lines changed

2 files changed

+70
-72
lines changed

src/models/decomposition_models.jl

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ function MMI.fit(model::PCA, verbosity::Int, X)
3434
Xarray = MMI.matrix(X)
3535
mindim = minimum(size(Xarray))
3636
maxoutdim = model.maxoutdim == 0 ? mindim : model.maxoutdim
37-
# NOTE: copy/transpose
3837
fitresult = MS.fit(
39-
MS.PCA, transpose(Xarray);
38+
MS.PCA, Xarray';
4039
method=model.method,
4140
pratio=model.pratio,
4241
maxoutdim=maxoutdim,
@@ -178,7 +177,7 @@ function MMI.fit(model::ICA, verbosity::Int, X)
178177
m = min(n, p)
179178
k = ifelse(model.k m, model.k, m)
180179
fitresult = MS.fit(
181-
MS.ICA, transpose(Xarray), k;
180+
MS.ICA, Xarray', k;
182181
alg=model.alg,
183182
fun=icagfun(model.fun, eltype(Xarray)),
184183
do_whiten=model.do_whiten,
@@ -238,9 +237,8 @@ function MMI.fit(model::PPCA, verbosity::Int, X)
238237
Xarray = MMI.matrix(X)
239238
def_dim = max(1, size(Xarray, 2) - 1)
240239
maxoutdim = model.maxoutdim == 0 ? def_dim : model.maxoutdim
241-
# NOTE: copy/transpose
242240
fitresult = MS.fit(
243-
MS.PPCA, transpose(Xarray);
241+
MS.PPCA, Xarray';
244242
method=model.method,
245243
tol=model.tol,
246244
maxiter=model.maxiter,
@@ -301,9 +299,8 @@ function MMI.fit(model::FactorAnalysis, verbosity::Int, X)
301299
Xarray = MMI.matrix(X)
302300
def_dim = max(1, size(Xarray, 2) - 1)
303301
maxoutdim = model.maxoutdim == 0 ? def_dim : model.maxoutdim
304-
# NOTE: copy/transpose
305302
fitresult = MS.fit(
306-
MS.FactorAnalysis, transpose(Xarray);
303+
MS.FactorAnalysis, Xarray';
307304
method=model.method,
308305
maxiter=model.maxiter,
309306
tol=model.tol,
@@ -349,17 +346,17 @@ for (M, MFitResultType) in model_types
349346
end
350347

351348
@eval function MMI.transform(::$M, fr::$MFitResultType, X)
352-
# X is n x d, need to transpose twice
349+
# X is n x d, need to take adjoint twice
353350
Xarray = MMI.matrix(X)
354-
Xnew = transpose(MS.predict(fr, transpose(Xarray)))
351+
Xnew = MS.predict(fr, Xarray')'
355352
return MMI.table(Xnew, prototype=X)
356353
end
357354

358355
if hasmethod(MS.reconstruct, Tuple{MFitResultType{Float64}, Matrix{Float64}})
359356
@eval function MMI.inverse_transform(::$M, fr::$MFitResultType, Y)
360-
# X is n x p, need to transpose twice
357+
# X is n x p, need to take adjoint twice
361358
Yarray = MMI.matrix(Y)
362-
Ynew = transpose(MS.reconstruct(fr, transpose(Yarray)))
359+
Ynew = MS.reconstruct(fr, Yarray')'
363360
return MMI.table(Ynew, prototype=Y)
364361
end
365362
end

src/models/discriminant_analysis.jl

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,26 @@ $LDA_DESCR
99
# Keyword Parameters
1010
1111
- `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods
12-
- `cov_w::CovarianceEstimator`=SimpleCovariance: an estimator for the within-class
12+
- `cov_w::CovarianceEstimator`=SimpleCovariance: an estimator for the within-class
1313
covariance (used in computing within-class scatter matrix, Sw), by default set
14-
to the standard `MultivariateStats.SimpleCovariance()` but
14+
to the standard `MultivariateStats.SimpleCovariance()` but
1515
could be set to any robust estimator from `CovarianceEstimation.jl`.
16-
- `cov_b::CovarianceEstimator`=SimpleCovariance: same as `cov_w` but for the between-class
16+
- `cov_b::CovarianceEstimator`=SimpleCovariance: same as `cov_w` but for the between-class
1717
covariance (used in computing between-class scatter matrix, Sb)
18-
- `out_dim::Int=0`: the output dimension, i.e dimension of the transformed space,
18+
- `out_dim::Int=0`: the output dimension, i.e dimension of the transformed space,
1919
automatically set if 0 is given (default).
20-
- `regcoef::Float64=1e-6`: regularization coefficient (default value 1e-6). A positive
21-
value `regcoef * eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added
22-
to the diagonal of Sw to improve numerical stability. This can be useful if using
20+
- `regcoef::Float64=1e-6`: regularization coefficient (default value 1e-6). A positive
21+
value `regcoef * eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added
22+
to the diagonal of Sw to improve numerical stability. This can be useful if using
2323
the standard covariance estimator.
2424
- `dist::SemiMetric=SqEuclidean`: the distance metric to use when performing classification
25-
(to compare the distance between a new point and centroids in the transformed space),
25+
(to compare the distance between a new point and centroids in the transformed space),
2626
an alternative choice can be the `CosineDist`.Defaults to `SqEuclidean`
2727
28-
See also the
28+
See also the
2929
[package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html).
30-
For more information about the algorithm, see the paper by Li, Zhu and Ogihara,
31-
[Using Discriminant Analysis for Multi-class Classification:
30+
For more information about the algorithm, see the paper by Li, Zhu and Ogihara,
31+
[Using Discriminant Analysis for Multi-class Classification:
3232
An Experimental Investigation](http://citeseerx.ist.psu.edu/viewdoc/
3333
download?doi=10.1.1.89.7068&rep=rep1&type=pdf).
3434
"""
@@ -42,7 +42,7 @@ download?doi=10.1.1.89.7068&rep=rep1&type=pdf).
4242
end
4343

4444
function MMI.fit(model::LDA, ::Int, X, y)
45-
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
45+
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
4646
_check_lda_data(model, X, y)
4747
core_res = MS.fit(
4848
MS.MulticlassLDA, nc, Xm_t, Int.(yplain);
@@ -71,15 +71,14 @@ function _check_lda_data(model, X, y)
7171
class_list = MMI.classes(y[1]) # Class list containing entries in pool of y.
7272
nclasses = length(class_list)
7373
# Class list containing entries in seen in y.
74-
classes_seen = filter(in(y), class_list)
74+
classes_seen = filter(in(y), class_list)
7575
nc = length(classes_seen) # Number of classes in pool of y.
7676
integers_seen = MMI.int(classes_seen)
77-
# NOTE: copy/transpose.
7877
Xm_t = _matrix_transpose(model, X) # Now p x n matrix
7978
yplain = MMI.int(y) # Vector of n ints in {1,..., nclasses}.
8079
p, n = size(Xm_t)
8180
# Recode yplain to be in {1,..., nc}
82-
nc == nclasses || _replace!(yplain, integers_seen, 1:nc)
81+
nc == nclasses || _replace!(yplain, integers_seen, 1:nc)
8382
# Check to make sure we have more than one class in training sample.
8483
# This is to prevent Sb from being a zero matrix.
8584
if nc <= 1
@@ -112,7 +111,7 @@ function _check_lda_data(model, X, y)
112111
"where `p` is the number of features in `X`"
113112
)
114113
)
115-
end
114+
end
116115
return Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim
117116
end
118117

@@ -147,7 +146,7 @@ metadata_model(LDA,
147146
descr=LDA_DESCR,
148147
path="$(PKG).LDA"
149148
)
150-
149+
151150

152151
####
153152
#### BayesianLDA
@@ -161,26 +160,26 @@ $BayesianLDA_DESCR
161160
# Keyword Parameters
162161
163162
- `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods
164-
- `cov_w::CovarianceEstimator=SimpleCovariance()`: an estimator for the within-class
165-
covariance (used in computing within-class scatter matrix, Sw), by default set to the
166-
standard `MultivariateStats.CovarianceEstimator` but could be set to any robust
163+
- `cov_w::CovarianceEstimator=SimpleCovariance()`: an estimator for the within-class
164+
covariance (used in computing within-class scatter matrix, Sw), by default set to the
165+
standard `MultivariateStats.CovarianceEstimator` but could be set to any robust
167166
estimator from `CovarianceEstimation.jl`.
168-
- `cov_b::CovarianceEstimator=SimpleCovariance()`: same as `cov_w` but for the
167+
- `cov_b::CovarianceEstimator=SimpleCovariance()`: same as `cov_w` but for the
169168
between-class covariance(used in computing between-class scatter matrix, Sb).
170-
- `out_dim::Int=0`: the output dimension, i.e dimension of the transformed space,
169+
- `out_dim::Int=0`: the output dimension, i.e dimension of the transformed space,
171170
automatically set if 0 is given (default).
172-
- `regcoef::Float64=1e-6`: regularization coefficient (default value 1e-6). A positive
173-
value `regcoef * eigmax(Sw)` where `Sw` is the within-class covariance estimator, is added
174-
to the diagonal of Sw to improve numerical stability. This can be useful if using the
171+
- `regcoef::Float64=1e-6`: regularization coefficient (default value 1e-6). A positive
172+
value `regcoef * eigmax(Sw)` where `Sw` is the within-class covariance estimator, is added
173+
to the diagonal of Sw to improve numerical stability. This can be useful if using the
175174
standard covariance estimator.
176-
- `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Baye's rule. If `priors = nothing` then
177-
`priors` are estimated from the class proportions in the training data. Otherwise it
178-
requires a `Vector` containing class probabilities with probabilities specified using
175+
- `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Baye's rule. If `priors = nothing` then
176+
`priors` are estimated from the class proportions in the training data. Otherwise it
177+
requires a `Vector` containing class probabilities with probabilities specified using
179178
the order given by `levels(y)` where y is the target vector.
180179
181180
See also the [package documentation](
182181
https://multivariatestatsjl.readthedocs.io/en/latest/lda.html).
183-
For more information about the algorithm, see the paper by Li, Zhu and Ogihara,
182+
For more information about the algorithm, see the paper by Li, Zhu and Ogihara,
184183
[Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](
185184
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.89.7068&rep=rep1&type=pdf).
186185
"""
@@ -194,14 +193,14 @@ http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.89.7068&rep=rep1&type=p
194193
end
195194

196195
function MMI.fit(model::BayesianLDA, ::Int, X, y)
197-
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
198-
_check_lda_data(model, X, y)
196+
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
197+
_check_lda_data(model, X, y)
199198
## If piors are specified check if they makes sense.
200199
## This was put here to through errors much earlier
201200
if isa(model.priors, Vector)
202-
priors = _check_lda_priors(model.priors, nc, nclasses, integers_seen)
201+
priors = _check_lda_priors(model.priors, nc, nclasses, integers_seen)
203202
end
204-
203+
205204
core_res = MS.fit(
206205
MS.MulticlassLDA, nc, Xm_t, Int.(yplain);
207206
method=model.method,
@@ -234,22 +233,23 @@ function MMI.fit(model::BayesianLDA, ::Int, X, y)
234233
return fitresult, cache, report
235234
end
236235

237-
function _matrix_transpose(model::Union{LDA, BayesianLDA}, X)
238-
return MMI.matrix(X; transpose=true)
236+
function _matrix_transpose(::Union{LDA,BayesianLDA}, X)
237+
# MultivariateStats 9.0 is not supporting adjoints
238+
return MMI.matrix(X, transpose=true)
239239
end
240240

241241
@inline function _check_lda_priors(priors, nc, nclasses, integers_seen)
242242
if length(priors) != nclasses
243243
throw(ArgumentError("Invalid size of `priors`."))
244244
end
245-
245+
246246
# `priors` is esssentially always an instance of type `Vector{Float64}`.
247247
# The next two conditions implicitly checks that
248248
# ` 0 .<= priors .<= 1` and `sum(priors) ≈ 1` are true.
249249
if !isapprox(sum(priors), 1)
250250
throw(ArgumentError("probabilities specified in `priors` must sum to 1"))
251251
end
252-
if all(>=(0), priors)
252+
if all(>=(0), priors)
253253
throw(ArgumentError("probabilities specified in `priors` must non-negative"))
254254
end
255255
# Select priors for unique classes in `y` (For resampling purporses).
@@ -274,7 +274,7 @@ function MMI.predict(m::BayesianLDA, (core_res, classes_seen, priors, n), Xnew)
274274
XWt = MMI.matrix(Xnew) * core_res.proj
275275
# centroids in the transformed space, nc x o
276276
centroids = transpose(core_res.pmeans)
277-
277+
278278
# The discriminant matrix `Pr` is of dimension `nt x nc`
279279
# Pr[i,k] = -0.5*(xᵢ − µₖ)ᵀ(Σw⁻¹)(xᵢ − µₖ) + log(priorsₖ) where (Σw = Sw/n)
280280
# In the transformed space this becomes
@@ -308,7 +308,7 @@ metadata_model(
308308
descr=BayesianLDA_DESCR,
309309
path="$(PKG).BayesianLDA"
310310
)
311-
311+
312312
####
313313
#### SubspaceLDA
314314
####
@@ -320,12 +320,12 @@ $SubspaceLDA_DESCR
320320
321321
# Keyword Parameters
322322
323-
- `normalize=true`: Option to normalize the between class variance for the number of
323+
- `normalize=true`: Option to normalize the between class variance for the number of
324324
observations in each class, one of `true` or `false`.
325-
- `out_dim`: the dimension of the transformed space to be used by `predict` and
325+
- `out_dim`: the dimension of the transformed space to be used by `predict` and
326326
`transform` methods, automatically set if 0 is given (default).
327-
- `dist=SqEuclidean`: the distance metric to use when performing classification
328-
(to compare the distance between a new point and centroids in the transformed space),
327+
- `dist=SqEuclidean`: the distance metric to use when performing classification
328+
(to compare the distance between a new point and centroids in the transformed space),
329329
an alternative choice can be the `CosineDist`.
330330
331331
See also the [package documentation](
@@ -342,14 +342,14 @@ end
342342

343343
function MMI.fit(model::SubspaceLDA, ::Int, X, y)
344344
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
345-
_check_lda_data(model, X, y)
345+
_check_lda_data(model, X, y)
346346

347347
core_res = MS.fit(
348348
MS.SubspaceLDA, Xm_t, Int.(yplain), nc;
349349
normalize = model.normalize
350350
)
351351
# λ is a (nc -1) x 1 vector containing the eigen values sorted in descending order.
352-
λ = core_res.λ
352+
λ = core_res.λ
353353
explained_variance_ratio = λ ./ sum(λ) #proportions of variance
354354

355355
cache = nothing
@@ -409,15 +409,15 @@ $BayesianSubspaceLDA_DESCR
409409
410410
- `normalize::Bool=true`: Option to normalize the between class variance for the number of
411411
observations in each class, one of `true` or `false`.
412-
- `out_dim::Int=0`: the dimension of the transformed space to be used by `predict` and
412+
- `out_dim::Int=0`: the dimension of the transformed space to be used by `predict` and
413413
`transform` methods, automatically set if 0 is given (default).
414-
- `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Baye's
415-
rule. If `priors = nothing` then `priors` are estimated from the class proportions
416-
in the training data. Otherwise it requires a `Vector` containing class
417-
probabilities with probabilities specified using the order given by `levels(y)`
414+
- `priors::Union{Nothing, Vector{Float64}}=nothing`: For use in prediction with Baye's
415+
rule. If `priors = nothing` then `priors` are estimated from the class proportions
416+
in the training data. Otherwise it requires a `Vector` containing class
417+
probabilities with probabilities specified using the order given by `levels(y)`
418418
where y is the target vector.
419419
420-
For more information about the algorithm, see the paper by Howland & Park (2006),
420+
For more information about the algorithm, see the paper by Howland & Park (2006),
421421
"Generalizing discriminant analysis using the generalized singular value decomposition"
422422
,IEEE Trans. Patt. Anal. & Mach. Int., 26: 995-1006.
423423
"""
@@ -428,14 +428,14 @@ For more information about the algorithm, see the paper by Howland & Park (2006)
428428
end
429429

430430
function MMI.fit(model::BayesianSubspaceLDA, ::Int, X, y)
431-
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
432-
_check_lda_data(model, X, y)
431+
Xm_t, yplain, classes_seen, p, n, nc, nclasses, integers_seen, out_dim =
432+
_check_lda_data(model, X, y)
433433
## If piors are specified check if they makes sense.
434434
## This was put here to through errors much earlier
435435
if isa(model.priors, Vector)
436-
priors = _check_lda_priors(model.priors, nc, nclasses, integers_seen)
436+
priors = _check_lda_priors(model.priors, nc, nclasses, integers_seen)
437437
end
438-
438+
439439
core_res = MS.fit(
440440
MS.SubspaceLDA, Xm_t, Int.(yplain), nc;
441441
normalize = model.normalize
@@ -466,9 +466,9 @@ function MMI.fit(model::BayesianSubspaceLDA, ::Int, X, y)
466466
end
467467

468468
function _matrix_transpose(model::Union{SubspaceLDA, BayesianSubspaceLDA}, X)
469-
return transpose(MMI.matrix(X))
469+
return MMI.matrix(X)'
470470
end
471-
471+
472472
function MMI.fitted_params(::BayesianSubspaceLDA, (core_res, _, _, priors,_))
473473
return (
474474
projected_class_means=MS.classmeans(core_res),
@@ -487,7 +487,7 @@ function MMI.predict(
487487
#proj is the projection_matrix
488488
proj = core_res.projw * view(core_res.projLDA, :, 1:out_dim)
489489
XWt = MMI.matrix(Xnew) * proj
490-
490+
491491
# centroids in the transformed space, nc x o
492492
centroids = transpose(core_res.cmeans) * proj
493493
nc = length(classes_seen)
@@ -499,8 +499,8 @@ function MMI.predict(
499499
# Pr[i,k] = -0.5*(Pᵀxᵢ − Pᵀµₖ)ᵀ(PᵀΣw⁻¹P)(Pᵀxᵢ − Pᵀµₖ) + log(priorsₖ)
500500
# But PᵀSw⁻¹P = (1/mult)*I and PᵀΣw⁻¹P = (n-nc)/mult*I
501501
# Giving Pr[i,k] = -0.5*n*(Pᵀxᵢ − Pᵀµₖ)ᵀ(Pᵀxᵢ − Pᵀµₖ) + log(priorsₖ)
502-
# (Pᵀxᵢ − Pᵀµₖ)ᵀ(Pᵀxᵢ − Pᵀµₖ) is the SquaredEquclidean distance in the
503-
# transformed space
502+
# (Pᵀxᵢ − Pᵀµₖ)ᵀ(Pᵀxᵢ − Pᵀµₖ) is the SquaredEquclidean distance in the
503+
# transformed space
504504
Pr = pairwise(SqEuclidean(), XWt, centroids, dims=1)
505505
Pr .*= (-(n-nc)/2mult)
506506
Pr .+= log.(transpose(priors))
@@ -512,7 +512,8 @@ end
512512

513513
function MMI.transform(m::T, (core_res, out_dim, _), X) where T<:Union{SubspaceLDA, BayesianSubspaceLDA}
514514
# projection of X, XWt is nt x o where o = out dims
515-
proj = core_res.projw * view(core_res.projLDA, :, 1:out_dim) #proj is the projection_matrix
515+
proj = core_res.projw * view(core_res.projLDA, :, 1:out_dim)
516+
#proj is the projection_matrix
516517
XWt = MMI.matrix(X) * proj
517518
return MMI.table(XWt, prototype = X)
518519
end

0 commit comments

Comments
 (0)