Skip to content

Commit bd8f263

Browse files
authored
Merge pull request #35 from JuliaAI/dev
For a 0.3 release
2 parents 63c7d3d + c30fbb4 commit bd8f263

File tree

6 files changed

+124
-122
lines changed

6 files changed

+124
-122
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
fail-fast: false
1818
matrix:
1919
version:
20-
- '1.0'
20+
- '1.6'
2121
- '1'
2222
os:
2323
- ubuntu-latest

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MLJMultivariateStatsInterface"
22
uuid = "1b6a4a23-ba22-4f51-9698-8599985d3728"
33
authors = ["Anthony D. Blaom <[email protected]>", "Thibaut Lienart <[email protected]>", "Okon Samuel <[email protected]>"]
4-
version = "0.2.2"
4+
version = "0.3.0"
55

66
[deps]
77
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
@@ -13,9 +13,9 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1313
[compat]
1414
Distances = "^0.9,^0.10"
1515
MLJModelInterface = "^0.3.5,^0.4, 1.0"
16-
MultivariateStats = "0.7, 0.8"
16+
MultivariateStats = "0.9"
1717
StatsBase = "0.32, 0.33"
18-
julia = "1"
18+
julia = "1.6"
1919

2020
[extras]
2121
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

src/models/decomposition_models.jl

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ $PCA_DESCR
99
1010
# Keyword Parameters
1111
12-
- `maxoutdim::Int=0`: maximum number of output dimensions, uses the smallest dimension of
12+
- `maxoutdim::Int=0`: maximum number of output dimensions, uses the smallest dimension of
1313
training feature matrix if 0 (default).
14-
- `method::Symbol=:auto`: method to use to solve the problem, one of `:auto`,`:cov`
14+
- `method::Symbol=:auto`: method to use to solve the problem, one of `:auto`,`:cov`
1515
or `:svd`
1616
- `pratio::Float64=0.99`: ratio of variance preserved
17-
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: if set to nothing(default)
18-
centering will be computed and applied, if set to `0` no
19-
centering(assumed pre-centered), if a vector is passed, the centering is done with
17+
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: if set to nothing(default)
18+
centering will be computed and applied, if set to `0` no
19+
centering(assumed pre-centered), if a vector is passed, the centering is done with
2020
that vector.
2121
"""
2222
@mlj_model mutable struct PCA <: MMI.Unsupervised
@@ -34,21 +34,20 @@ function MMI.fit(model::PCA, verbosity::Int, X)
3434
Xarray = MMI.matrix(X)
3535
mindim = minimum(size(Xarray))
3636
maxoutdim = model.maxoutdim == 0 ? mindim : model.maxoutdim
37-
# NOTE: copy/transpose
3837
fitresult = MS.fit(
39-
MS.PCA, transpose(Xarray);
38+
MS.PCA, Xarray';
4039
method=model.method,
4140
pratio=model.pratio,
4241
maxoutdim=maxoutdim,
4342
mean=model.mean
4443
)
4544
cache = nothing
4645
report = (
47-
indim=MS.indim(fitresult),
48-
outdim=MS.outdim(fitresult),
46+
indim=MS.size(fitresult,1),
47+
outdim=MS.size(fitresult,2),
4948
tprincipalvar=MS.tprincipalvar(fitresult),
5049
tresidualvar=MS.tresidualvar(fitresult),
51-
tvar=MS.tvar(fitresult),
50+
tvar=MS.var(fitresult),
5251
mean=copy(MS.mean(fitresult)),
5352
principalvars=copy(MS.principalvars(fitresult))
5453
)
@@ -74,14 +73,14 @@ $KPCA_DESCR
7473
7574
# Keyword Parameters
7675
77-
- `maxoutdim::Int = 0`: maximum number of output dimensions, uses the smallest
76+
- `maxoutdim::Int = 0`: maximum number of output dimensions, uses the smallest
7877
dimension of training feature matrix if 0 (default).
79-
- `kernel::Function=(x,y)->x'y`: kernel function of 2 vector arguments x and y, returns a
78+
- `kernel::Function=(x,y)->x'y`: kernel function of 2 vector arguments x and y, returns a
8079
scalar value
81-
- `solver::Symbol=:auto`: solver to use for the eigenvalues, one of `:eig`(default),
80+
- `solver::Symbol=:auto`: solver to use for the eigenvalues, one of `:eig`(default),
8281
`:eigs`
83-
- `inverse::Bool=false`: perform calculation for inverse transform
84-
- `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform
82+
- `inverse::Bool=true`: perform calculations needed for inverse transform
83+
- `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform
8584
when inverse is true
8685
- `tol::Real=0.0`: Convergence tolerance for eigs solver
8786
- `maxiter::Int=300`: maximum number of iterations for eigs solver
@@ -90,7 +89,7 @@ $KPCA_DESCR
9089
maxoutdim::Int = 0::(_ ≥ 0)
9190
kernel::Union{Nothing, Function} = default_kernel
9291
solver::Symbol = :eig::(_ in (:eig, :eigs))
93-
inverse::Bool = false
92+
inverse::Bool = true
9493
beta::Real = 1.0::(_ ≥ 0.0)
9594
tol::Real = 1e-6::(_ ≥ 0.0)
9695
maxiter::Int = 300::(_ ≥ 1)
@@ -102,7 +101,7 @@ function MMI.fit(model::KernelPCA, verbosity::Int, X)
102101
# default max out dim if not given
103102
maxoutdim = model.maxoutdim == 0 ? mindim : model.maxoutdim
104103
fitresult = MS.fit(
105-
MS.KernelPCA,
104+
MS.KernelPCA,
106105
permutedims(Xarray);
107106
kernel=model.kernel,
108107
maxoutdim=maxoutdim,
@@ -113,9 +112,9 @@ function MMI.fit(model::KernelPCA, verbosity::Int, X)
113112
)
114113
cache = nothing
115114
report = (
116-
indim=MS.indim(fitresult),
117-
outdim=MS.outdim(fitresult),
118-
principalvars=copy(MS.principalvars(fitresult))
115+
indim=MS.size(fitresult,1),
116+
outdim=MS.size(fitresult,2),
117+
principalvars=copy(MS.eigvals(fitresult))
119118
)
120119
return fitresult, cache, report
121120
end
@@ -143,17 +142,16 @@ $ICA_DESCR
143142
144143
- `k::Int=0`: number of independent components to recover, set automatically if `0`
145144
- `alg::Symbol=:fastica`: algorithm to use (only `:fastica` is supported at the moment)
146-
- `fun::Symbol=:tanh`: approximate neg-entropy functor, via the function
147-
`MultivariateStats.icagfun`, one of `:tanh` and `:gaus`
145+
- `fun::Symbol=:tanh`: approximate neg-entropy function, one of `:tanh`, `:gaus`
148146
- `do_whiten::Bool=true`: whether to perform pre-whitening
149147
- `maxiter::Int=100`: maximum number of iterations
150148
- `tol::Real=1e-6`: convergence tolerance for change in matrix W
151-
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default)
152-
centering is computed andapplied, if zero, no centering, a vector of means can
149+
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default)
150+
centering is computed andapplied, if zero, no centering, a vector of means can
153151
be passed
154-
- `winit::Union{Nothing,Matrix{<:Real}}=nothing`: initial guess for matrix `W` either
155-
an empty matrix (random initilization of `W`), a matrix of size `k × k` (if `do_whiten`
156-
is true), a matrix of size `m × k` otherwise. If unspecified i.e `nothing` an empty
152+
- `winit::Union{Nothing,Matrix{<:Real}}=nothing`: initial guess for matrix `W` either
153+
an empty matrix (random initilization of `W`), a matrix of size `k × k` (if `do_whiten`
154+
is true), a matrix of size `m × k` otherwise. If unspecified i.e `nothing` an empty
157155
`Matrix{<:Real}` is used.
158156
"""
159157
@mlj_model mutable struct ICA <: MMI.Unsupervised
@@ -168,14 +166,19 @@ $ICA_DESCR
168166
end
169167

170168
function MMI.fit(model::ICA, verbosity::Int, X)
169+
icagfun(fname::Symbol, ::Type{T} = Float64) where T<:Real=
170+
fname == :tanh ? MS.Tanh{T}(1.0) :
171+
fname == :gaus ? MS.Gaus{T}() :
172+
error("Unknown gfun $(fname)")
173+
171174
Xarray = MMI.matrix(X)
172175
n, p = size(Xarray)
173176
m = min(n, p)
174177
k = ifelse(model.k m, model.k, m)
175178
fitresult = MS.fit(
176-
MS.ICA, transpose(Xarray), k;
179+
MS.ICA, Xarray', k;
177180
alg=model.alg,
178-
fun=MS.icagfun(model.fun, eltype(Xarray)),
181+
fun=icagfun(model.fun, eltype(Xarray)),
179182
do_whiten=model.do_whiten,
180183
maxiter=model.maxiter,
181184
tol=model.tol,
@@ -184,8 +187,8 @@ function MMI.fit(model::ICA, verbosity::Int, X)
184187
)
185188
cache = nothing
186189
report = (
187-
indim=MS.indim(fitresult),
188-
outdim=MS.outdim(fitresult),
190+
indim=MS.size(fitresult,1),
191+
outdim=MS.size(fitresult,2),
189192
mean=copy(MS.mean(fitresult))
190193
)
191194
return fitresult, cache, report
@@ -211,14 +214,14 @@ $PPCA_DESCR
211214
212215
# Keyword Parameters
213216
214-
- `maxoutdim::Int=0`: maximum number of output dimensions, uses max(no_of_features - 1, 1)
217+
- `maxoutdim::Int=0`: maximum number of output dimensions, uses max(no_of_features - 1, 1)
215218
if 0 (default).
216219
- `method::Symbol=:ml`: method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.
217220
- `maxiter::Int=1000`: maximum number of iterations.
218221
- `tol::Real=1e-6`: convergence tolerance.
219-
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: if set to nothing(default)
220-
centering will be computed and applied, if set to `0` no
221-
centering(assumed pre-centered), if a vector is passed, the centering is done with
222+
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: if set to nothing(default)
223+
centering will be computed and applied, if set to `0` no
224+
centering(assumed pre-centered), if a vector is passed, the centering is done with
222225
that vector.
223226
"""
224227
@mlj_model mutable struct PPCA <: MMI.Unsupervised
@@ -233,9 +236,8 @@ function MMI.fit(model::PPCA, verbosity::Int, X)
233236
Xarray = MMI.matrix(X)
234237
def_dim = max(1, size(Xarray, 2) - 1)
235238
maxoutdim = model.maxoutdim == 0 ? def_dim : model.maxoutdim
236-
# NOTE: copy/transpose
237239
fitresult = MS.fit(
238-
MS.PPCA, transpose(Xarray);
240+
MS.PPCA, Xarray';
239241
method=model.method,
240242
tol=model.tol,
241243
maxiter=model.maxiter,
@@ -244,8 +246,8 @@ function MMI.fit(model::PPCA, verbosity::Int, X)
244246
)
245247
cache = nothing
246248
report = (
247-
indim=MS.indim(fitresult),
248-
outdim=MS.outdim(fitresult),
249+
indim=MS.size(fitresult,1),
250+
outdim=MS.size(fitresult,2),
249251
tvar=MS.var(fitresult),
250252
mean=copy(MS.mean(fitresult)),
251253
loadings=MS.loadings(fitresult)
@@ -273,14 +275,14 @@ $PPCA_DESCR
273275
# Keyword Parameters
274276
275277
- `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.
276-
- `maxoutdim::Int=0`: Maximum number of output dimensions, uses max(no_of_features - 1, 1)
278+
- `maxoutdim::Int=0`: Maximum number of output dimensions, uses max(no_of_features - 1, 1)
277279
if 0 (default).
278280
- `maxiter::Int=1000`: Maximum number of iterations.
279281
- `tol::Real=1e-6`: Convergence tolerance.
280282
- `eta::Real=tol`: Variance lower bound
281-
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If set to nothing(default)
282-
centering will be computed and applied, if set to `0` no
283-
centering(assumed pre-centered), if a vector is passed, the centering is done with
283+
- `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If set to nothing(default)
284+
centering will be computed and applied, if set to `0` no
285+
centering(assumed pre-centered), if a vector is passed, the centering is done with
284286
that vector.
285287
"""
286288
@mlj_model mutable struct FactorAnalysis <: MMI.Unsupervised
@@ -296,9 +298,8 @@ function MMI.fit(model::FactorAnalysis, verbosity::Int, X)
296298
Xarray = MMI.matrix(X)
297299
def_dim = max(1, size(Xarray, 2) - 1)
298300
maxoutdim = model.maxoutdim == 0 ? def_dim : model.maxoutdim
299-
# NOTE: copy/transpose
300301
fitresult = MS.fit(
301-
MS.FactorAnalysis, transpose(Xarray);
302+
MS.FactorAnalysis, Xarray';
302303
method=model.method,
303304
maxiter=model.maxiter,
304305
tol=model.tol,
@@ -308,8 +309,8 @@ function MMI.fit(model::FactorAnalysis, verbosity::Int, X)
308309
)
309310
cache = nothing
310311
report = (
311-
indim=MS.indim(fitresult),
312-
outdim=MS.outdim(fitresult),
312+
indim=MS.size(fitresult,1),
313+
outdim=MS.size(fitresult,2),
313314
variance=MS.var(fitresult),
314315
covariance_matrix=MS.cov(fitresult),
315316
mean=MS.mean(fitresult),
@@ -344,17 +345,17 @@ for (M, MFitResultType) in model_types
344345
end
345346

346347
@eval function MMI.transform(::$M, fr::$MFitResultType, X)
347-
# X is n x d, need to transpose twice
348+
# X is n x d, need to take adjoint twice
348349
Xarray = MMI.matrix(X)
349-
Xnew = transpose(MS.transform(fr, transpose(Xarray)))
350+
Xnew = MS.predict(fr, Xarray')'
350351
return MMI.table(Xnew, prototype=X)
351352
end
352353

353354
if hasmethod(MS.reconstruct, Tuple{MFitResultType{Float64}, Matrix{Float64}})
354355
@eval function MMI.inverse_transform(::$M, fr::$MFitResultType, Y)
355-
# X is n x p, need to transpose twice
356+
# X is n x p, need to take adjoint twice
356357
Yarray = MMI.matrix(Y)
357-
Ynew = transpose(MS.reconstruct(fr, transpose(Yarray)))
358+
Ynew = MS.reconstruct(fr, Yarray')'
358359
return MMI.table(Ynew, prototype=Y)
359360
end
360361
end

0 commit comments

Comments
 (0)