Merge pull request #11 from OkonSamuel/bugfix

ablaom · web-flow · commit 8523ae08d98c · 2020-10-27T08:18:59.000+13:00
fixed bug, improve code efficiency and add more tests
diff --git a/src/models/discriminant_analysis.jl b/src/models/discriminant_analysis.jl
@@ -67,6 +67,24 @@ function MMI.fit(model::LDA, ::Int, X, y)
     return fitresult, cache, report
 end
 
+
+"""
+  _replace!(y::AbstractVector, z::AbstractVector:, r::AbstractRange)
+  
+  internal method essentially the same as
+  Base.replace!(y, (z .=> r)...)
+  but more efficient
+"""
+function _replace!(y::AbstractVector, z::AbstractVector, r::AbstractRange)
+    length(r) == length(z) || 
+     throw(ArgumentError("`z` and `r` has to be of the same length"))
+    @inbounds for i in eachindex(y)
+        for j in eachindex(z) 
+            isequal(z[j], y[i]) && (y[i] = r[j])
+        end
+    end
+end
+
 function _check_lda_data(model, X, y)
     class_list = MMI.classes(y[1]) # Class list containing entries in pool of y.
     nclasses = length(class_list)
@@ -79,7 +97,7 @@ function _check_lda_data(model, X, y)
     yplain = MMI.int(y) # Vector of n ints in {1,..., nclasses}.
     p, n = size(Xm_t)
     # Recode yplain to be in {1,..., nc}
-    nc == nclasses || replace!(yplain, (integers_seen .=> 1:nc)...)
+    nc == nclasses || _replace!(yplain, integers_seen, 1:nc) 
     # Check to make sure we have more than one class in training sample.
     # This is to prevent Sb from being a zero matrix.
     if nc <= 1
@@ -90,7 +108,6 @@ function _check_lda_data(model, X, y)
             )
         )
     end
-
     # Check to make sure we have more samples than classes.
     # This is to prevent Sw from being the zero matrix.
     if n <= nc
@@ -134,7 +151,7 @@ function MMI.predict(m::LDA, (core_res, classes_seen), Xnew)
     # compute the distances in the transformed space between pairs of rows
     # the probability matrix Pr is `n x nc` and normalised accross rows
     Pr = pairwise(m.dist, XWt, centroids, dims=1)
-    Pr .= Pr .* -1
+    Pr .*= -1
     # apply a softmax transformation
     softmax!(Pr)
     return MMI.UnivariateFinite(classes_seen, Pr)
@@ -239,26 +256,33 @@ function _matrix_transpose(model::Union{LDA, BayesianLDA}, X)
     return MMI.matrix(X; transpose=true)
 end
 
-function _check_lda_priors(priors, nc, nclasses, integers_seen)
+@inline function _check_lda_priors(priors, nc, nclasses, integers_seen)
     if length(priors) != nclasses
         throw(ArgumentError("Invalid size of `priors`."))
-    end 
+    end
+     
+    # `priors` is esssentially always an instance of type `Vector{Float64}`.
+    # The next two conditions implicitly checks that
+    # ` 0 .<= priors .<= 1` and `sum(priors) ≈ 1` are true.
     if !isapprox(sum(priors), 1)
         throw(ArgumentError("probabilities specified in `priors` must sum to 1"))
     end
-    if any(model.priors .< 0)
+    if all(>=(0), priors) 
         throw(ArgumentError("probabilities specified in `priors` must non-negative"))
     end
     # Select priors for unique classes in `y` (For resampling purporses).
     priors_ = nc == nclasses ? model.priors : @view model.priors[integers_seen]
     return priors_
 end
 
+_get_priors(priors::SubArray) = copy(priors)
+_get_priors(priors) = priors
+
 function MMI.fitted_params(::BayesianLDA, (core_res, classes_seen, priors, n))
    return (
        projected_class_means=MS.classmeans(core_res),
        projection_matrix=MS.projection(core_res),
-       priors=priors
+       priors=_get_priors(priors)
     )
 end
 
@@ -278,17 +302,17 @@ function MMI.predict(m::BayesianLDA, (core_res, classes_seen, priors, n), Xnew)
     # with (Pᵀxᵢ −  Pᵀµₖ)ᵀ(Pᵀxᵢ −  Pᵀµₖ) being the SquaredEquclidean distance between
     # pairs of rows in the transformed space
     Pr = pairwise(SqEuclidean(), XWt, centroids, dims=1)
-    Pr .*= (-0.5*n)
-    Pr .+= log.(priors)'
+    Pr .*= (-n/2)
+    Pr .+= log.(transpose(priors))
 
     # apply a softmax transformation to convert Pr to a probability matrix
     softmax!(Pr)
     return MMI.UnivariateFinite(classes_seen, Pr)
 end
 
-function MMI.transform(m::T, (core_res,), X) where T<:Union{LDA, BayesianLDA}
+function MMI.transform(m::T, (core_res, ), X) where T<:Union{LDA, BayesianLDA}
     # projection of X, XWt is nt x o  where o = out dims
-    proj = core_res.projw * core_res.projLDA #proj is the projection_matrix
+    proj = core_res.proj #proj is the projection_matrix
     XWt = MMI.matrix(X) * proj
     return MMI.table(XWt, prototype = X)
 end
@@ -374,7 +398,7 @@ function MMI.predict(m::SubspaceLDA, (core_res, out_dim, classes_seen), Xnew)
     # compute the distances in the transformed space between pairs of rows
     # the probability matrix is `nt x nc` and normalised accross rows
     Pr = pairwise(m.dist, XWt, centroids, dims=1)
-    Pr .= Pr .* -1
+    Pr .*= -1
     # apply a softmax transformation
     softmax!(Pr)
     return MMI.UnivariateFinite(classes_seen, Pr)
@@ -461,13 +485,13 @@ end
 
 function _matrix_transpose(model::Union{SubspaceLDA, BayesianSubspaceLDA}, X)
     return transpose(MMI.matrix(X))
-end 
-
+end
+ 
 function MMI.fitted_params(::BayesianSubspaceLDA, (core_res, _, _, priors,_))
     return (
         projected_class_means=MS.classmeans(core_res),
         projection_matrix=MS.projection(core_res),
-        priors=priors
+        priors=_get_priors(priors)
     )
 end
 
@@ -496,8 +520,8 @@ function MMI.predict(
     # (Pᵀxᵢ −  Pᵀµₖ)ᵀ(Pᵀxᵢ −  Pᵀµₖ) is the SquaredEquclidean distance in the 
     # transformed space  
     Pr = pairwise(SqEuclidean(), XWt, centroids, dims=1)
-    Pr .*= (-0.5 * (n-nc)/mult)
-    Pr .+= log.(priors)'
+    Pr .*= (-(n-nc)/2mult)
+    Pr .+= log.(transpose(priors))
 
     # apply a softmax transformation to convert Pr to a probability matrix
     softmax!(Pr)
diff --git a/test/models/discriminant_analysis.jl b/test/models/discriminant_analysis.jl
@@ -1,4 +1,5 @@
 @testset "MulticlassLDA" begin
+    ## Data
     Xfull, y = @load_smarket
     X = selectcols(Xfull, [:Lag1,:Lag2])
     train = selectcols(Xfull, :Year) .< Dates.Date(2005)
@@ -8,20 +9,33 @@
     Xtest = selectrows(X, test)
     ytest = selectrows(y, test)
 
-    LDA_model = LDA()
-    fitresult, = fit(LDA_model, 1, Xtrain, ytrain)
-    class_means, projection_matrix = fitted_params(LDA_model, fitresult)
-    preds = predict(LDA_model, fitresult, Xtest)
+    lda_model = LDA()
+    
+    ## Check model `fit`
+    fitresult, = fit(lda_model, 1, Xtrain, ytrain)
+    class_means, projection_matrix = fitted_params(lda_model, fitresult)
+    @test round.(class_means', sigdigits = 3) == [0.0428 0.0339; -0.0395 -0.0313]
+    ## Check model `predict`
+    preds = predict(lda_model, fitresult, Xtest)
     mce = cross_entropy(preds, ytest) |> mean
     @test 0.685 ≤ mce ≤ 0.695
-    @test round.(class_means', sigdigits = 3) == [0.0428 0.0339; -0.0395 -0.0313]
+    ## Check model `transform`
+    # MultivariateStats Linear Discriminant Analysis transform
+    proj = fitresult[1].proj
+    XWt = matrix(X) * proj
+    tlda_ms = table(XWt, prototype=X)
+    # MLJ Linear Discriminant Analysis transform
+    tlda_mlj = transform(lda_model, fitresult, X)
+    @test tlda_mlj == tlda_ms
+    ## Check model traits
     d = info_dict(LDA)
     @test d[:input_scitype] == Table(Continuous)
     @test d[:target_scitype] == AbstractVector{<:Finite}
     @test d[:name] == "LDA"
 end
 
 @testset "MLDA-2" begin
+    ## Data
     Random.seed!(1125)
     X1 = -2 .+ randn(100, 2)
     X2 = randn(100, 2)
@@ -41,14 +55,17 @@ end
     ytrain = selectrows(y, train)
     Xtest = selectrows(X, test)
     ytest = selectrows(y, test)
+    
     lda_model = LDA()
+    ## Check model `fit`/`predict`
     fitresult, = fit(lda_model, 1, Xtrain, ytrain)
     preds = predict_mode(lda_model, fitresult, Xtest)
     mcr = misclassification_rate(preds, ytest)
     @test mcr ≤ 0.15
 end
 
 @testset "BayesianMulticlassLDA" begin
+    ## Data
     Xfull, y = @load_smarket
     X = selectcols(Xfull, [:Lag1,:Lag2])
     train = selectcols(Xfull, :Year) .< Dates.Date(2005)
@@ -57,30 +74,32 @@ end
     ytrain = selectrows(y, train)
     Xtest = selectrows(X, test)
     ytest = selectrows(y, test)
+    
     BLDA_model = BayesianLDA()
+    ## Check model `fit`
     fitresult, = fit(BLDA_model, 1, Xtrain, ytrain)
     class_means, projection_matrix, priors = fitted_params(BLDA_model, fitresult)
+    @test round.(class_means', sigdigits = 3) == [0.0428 0.0339; -0.0395 -0.0313]
+    ## Check model `predict`
     preds = predict(BLDA_model, fitresult, Xtest)
     mce = cross_entropy(preds, ytest) |> mean
     @test 0.685 ≤ mce ≤ 0.695
-    @test round.(class_means', sigdigits = 3) == [0.0428 0.0339; -0.0395 -0.0313]
+    ## Check model traits
     d = info_dict(BayesianLDA)
     @test d[:input_scitype] == Table(Continuous)
     @test d[:target_scitype] == AbstractVector{<:Finite}
     @test d[:name] == "BayesianLDA"
 end
 
 @testset "BayesianSubspaceLDA" begin
+    ## Data 
     X, y = @load_iris
     LDA_model = BayesianSubspaceLDA()
+    ## Check model `fit`
     fitresult, _, report = fit(LDA_model, 1, X, y)
     class_means, projection_matrix, prior_probabilities = fitted_params(
         LDA_model, fitresult
     )
-    preds=predict(LDA_model, fitresult, X)
-    predicted_class = predict_mode(LDA_model, fitresult, X)
-    mcr = misclassification_rate(predicted_class, y)
-    mce = cross_entropy(preds, y) |> mean
     @test mean(
         abs.(
             class_means' - [
@@ -101,16 +120,24 @@ end
         )
     ) < 0.05
     @test round.(prior_probabilities, sigdigits=7) == [0.3333333, 0.3333333, 0.3333333]
-    @test round.(mcr, sigdigits=1) == 0.02
     @test round.(report.explained_variance_ratio, digits=4) == [0.9915, 0.0085]
+    
+    ## Check model `predict`
+    preds=predict(LDA_model, fitresult, X)
+    predicted_class = predict_mode(LDA_model, fitresult, X)
+    mcr = misclassification_rate(predicted_class, y)
+    mce = cross_entropy(preds, y) |> mean
+    @test round.(mcr, sigdigits=1) == 0.02   
     @test 0.04 ≤ mce ≤ 0.045
+    ## Check model traits
     d = info_dict(BayesianSubspaceLDA)
     @test d[:input_scitype] == Table(Continuous)
     @test d[:target_scitype] == AbstractVector{<:Finite}
     @test d[:name] == "BayesianSubspaceLDA"
 end
 
 @testset "SubspaceLDA" begin
+    ## Data
     Random.seed!(1125)
     X1 = -2 .+ randn(100, 2)
     X2 = randn(100, 2)
@@ -130,7 +157,9 @@ end
     ytrain = selectrows(y, train)
     Xtest = selectrows(X, test)
     ytest = selectrows(y, test)
+    
     lda_model = SubspaceLDA()
+    ## Check model `fit`/ `transform`
     fitresult, = fit(lda_model, 1, Xtrain, ytrain)
     preds = predict_mode(lda_model, fitresult, Xtest)
     mcr = misclassification_rate(preds, ytest)
@@ -144,8 +173,51 @@ end
     # MLJ Linear Discriminant Analysis transform
     tlda_mlj = transform(lda_model, fitresult, X)
     @test tlda_mlj == tlda_ms
+    ## Check model traits
     d = info_dict(SubspaceLDA)
     @test d[:input_scitype] == Table(Continuous)
     @test d[:target_scitype] == AbstractVector{<:Finite}
     @test d[:name] == "SubspaceLDA"
-end
+end
+
+@testset "discriminant models checks" begin
+## Data to be used for tests
+y = categorical(["apples", "oranges", "carrots", "mango"])
+X = (x1 =rand(4), x2 = collect(1:4))
+
+## Note: The following test depend on the order in which they are written.
+## Hence do not change the ordering of the tests.
+ 
+## Check to make sure error is thrown if we only have a single
+## unique class during training.
+model = LDA()
+# categorical array with same pool as y but only containing "apples"
+y1 = y[[1,1,1,1]]
+@test_throws ArgumentError fit(model, 1, X, y1)
+
+## Check to make sure error is thrown if we don't have more samples
+## than unique classes during training.
+@test_throws ArgumentError fit(model, 1, X, y)
+
+## Check to make sure error is thrown if `out_dim` exceeds the number of features in 
+## sample matrix used in training.
+model = LDA(out_dim=3)
+# categorical array with same pool as y but only containing "apples" & "oranges"
+y2 = y[[1,2,1,2]] 
+@test_throws ArgumentError fit(model, 1, X, y2)
+
+## Check to make sure error is thrown if length(`priors`) !=  number of classes 
+## in common pool of target vector used in training.
+model = BayesianLDA(priors=[0.1, 0.5, 0.4])
+@test_throws ArgumentError fit(model, 1, X, y)
+
+## Check to make sure error is thrown if sum(`priors`) isn't approximately equal to 1.  
+model = BayesianLDA(priors=[0.1, 0.5, 0.4, 0.2])
+@test_throws ArgumentError fit(model, 1, X, y)
+
+## Check to make sure error is thrown if `priors .< 0` or `priors .> 1`.  
+model = BayesianLDA(priors=[-0.1, 0.0, 1.0, 0.1])
+@test_throws ArgumentError fit(model, 1, X, y)
+model = BayesianLDA(priors=[1.1, 0.0, 0.0, -0.1])
+@test_throws ArgumentError fit(model, 1, X, y)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -8,6 +8,8 @@ using MLJMultivariateStatsInterface
 using StableRNGs
 using Test
 
+const MS = MultivariateStats
+
 include("testutils.jl")
 println("\nutils"); include("utils.jl")
 println("\ncomponent_analysis"); include("models/decomposition_models.jl")