@@ -134,7 +134,7 @@ metadata_model(
134134 outdim:: Int = 0 :: (_ ≥ 0)
135135 regcoef:: Float64 = 1e-6 :: (_ ≥ 0)
136136 priors:: Union {
137- Nothing,
137+ Nothing,
138138 UnivariateFinite{<: Any , <: Any , <: Any , <: Real },
139139 Dict{<: Any , <: Real }
140140 }= nothing
@@ -299,7 +299,7 @@ function subspace_outdim(core_res, outdim)
299299end
300300
301301function explained_variance (core_res)
302- # λ is a `min(r, nc -1) x 1` vector containing the eigen values sorted in descending
302+ # λ is a `min(r, nc -1) x 1` vector containing the eigen values sorted in descending
303303 # order, where `r` is the rank of the within-class covariance matrix.
304304 λ = core_res. λ
305305 return λ ./ sum (λ) # proportions of variance
@@ -325,14 +325,14 @@ function MMI.fit(model::SubspaceLDA, ::Int, X, y)
325325 class_weights= MS. classweights (core_res),
326326 explained_variance_ratio= explained_variance (core_res),
327327 )
328-
328+
329329 fitresult = (core_res, outdim, classes_seen, pool)
330330 return fitresult, cache, report
331331end
332332
333333function MMI. fitted_params (:: SubspaceLDA , (core_res, outdim, classes_seen, _))
334334 return (
335- classes= classes_seen,
335+ classes= classes_seen,
336336 projection_matrix= core_res. projw * view (core_res. projLDA, :, 1 : outdim)
337337 )
338338end
@@ -374,7 +374,7 @@ metadata_model(
374374 normalize:: Bool = false
375375 outdim:: Int = 0 :: (_ ≥ 0)
376376 priors:: Union {
377- Nothing,
377+ Nothing,
378378 UnivariateFinite{<: Any , <: Any , <: Any , <: Real },
379379 Dict{<: Any , <: Real }
380380 }= nothing
@@ -415,7 +415,7 @@ function MMI.fit(model::BayesianSubspaceLDA, ::Int, X, y)
415415 class_weights= MS. classweights (core_res),
416416 explained_variance_ratio= explained_variance (core_res),
417417 )
418-
418+
419419 fitresult = (core_res, outdim, classes_seen, pool, priors, n, mult)
420420 return fitresult, cache, report
421421end
@@ -501,19 +501,19 @@ metadata_model(
501501$(MMI. doc_header (LDA))
502502
503503[Multiclass linear discriminant
504- analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection
505- in a space of features to a lower dimensional space, in a way that attempts to preserve
504+ analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection
505+ in a space of features to a lower dimensional space, in a way that attempts to preserve
506506as much as possible the degree to which the classes of a discrete target variable can be
507507discriminated. This can be used either for dimension reduction of the features (see
508- `transform` below) or for probabilistic classification of the target
508+ `transform` below) or for probabilistic classification of the target
509509(see `predict` below).
510510
511511In the case of prediction, the class probability for a new observation reflects the
512512proximity of that observation to training observations associated with that class, and how
513513far away the observation is from observations associated with other classes. Specifically,
514- the distances, in the transformed (projected) space, of a new observation, from the
515- centroid of each target class, is computed; the resulting vector of distances, multiplied
516- by minus one, is passed to a softmax function to obtain a class probability prediction.
514+ the distances, in the transformed (projected) space, of a new observation, from the
515+ centroid of each target class, is computed; the resulting vector of distances, multiplied
516+ by minus one, is passed to a softmax function to obtain a class probability prediction.
517517Here "distance" is computed using a user-specified distance function.
518518
519519# Training data
@@ -524,10 +524,10 @@ In MLJ or MLJBase, bind an instance `model` to data with
524524
525525Here:
526526
527- - `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype
527+ - `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype
528528 `Continuous`; check column scitypes with `schema(X)`.
529529
530- - `y` is the target, which can be any `AbstractVector` whose element scitype is
530+ - `y` is the target, which can be any `AbstractVector` whose element scitype is
531531 `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`
532532
533533Train the machine using `fit!(mach, rows=...)`.
@@ -537,7 +537,7 @@ Train the machine using `fit!(mach, rows=...)`.
537537- `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.
538538
539539- `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class
540- covariance (used in computing the within-class scatter matrix, `Sw`). Any robust
540+ covariance (used in computing the within-class scatter matrix, `Sw`). Any robust
541541 estimator from `CovarianceEstimation.jl` can be used.
542542
543543- `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the
@@ -548,12 +548,12 @@ Train the machine using `fit!(mach, rows=...)`.
548548
549549- `regcoef::Float64=1e-6`: The regularization coefficient. A positive value
550550 `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the
551- diagonal of `Sw` to improve numerical stability. This can be useful if using the
551+ diagonal of `Sw` to improve numerical stability. This can be useful if using the
552552 standard covariance estimator.
553553
554554- `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification
555- (to compare the distance between a new point and centroids in the transformed space);
556- must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g.,
555+ (to compare the distance between a new point and centroids in the transformed space);
556+ must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g.,
557557 `Distances.CosineDist`.
558558
559559# Operations
@@ -575,7 +575,7 @@ The fields of `fitted_params(mach)` are:
575575- `classes`: The classes seen during model fitting.
576576
577577- `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where
578- `indim` and `outdim` are the input and output dimensions respectively (See Report
578+ `indim` and `outdim` are the input and output dimensions respectively (See Report
579579 section below).
580580
581581# Report
@@ -591,12 +591,12 @@ The fields of `report(mach)` are:
591591- `nclasses`: The number of classes directly observed in the training data (which can be
592592 less than the total number of classes in the class pool).
593593
594- - `class_means`: The class-specific means of the training data. A matrix of size
595- `(indim, nclasses)` with the ith column being the class-mean of the ith class in
594+ - `class_means`: The class-specific means of the training data. A matrix of size
595+ `(indim, nclasses)` with the ith column being the class-mean of the ith class in
596596 `classes` (See fitted params section above).
597597
598- - `class_weights`: The weights (class counts) of each class. A vector of length
599- `nclasses` with the ith element being the class weight of the ith class in
598+ - `class_weights`: The weights (class counts) of each class. A vector of length
599+ `nclasses` with the ith element being the class weight of the ith class in
600600 `classes`. (See fitted params section above.)
601601
602602- `Sb`: The between class scatter matrix.
@@ -663,7 +663,7 @@ Train the machine using `fit!(mach, rows=...)`.
663663- `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.
664664
665665- `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class
666- covariance (used in computing the within-class scatter matrix, `Sw`). Any robust
666+ covariance (used in computing the within-class scatter matrix, `Sw`). Any robust
667667 estimator from `CovarianceEstimation.jl` can be used.
668668
669669- `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the
@@ -674,13 +674,13 @@ Train the machine using `fit!(mach, rows=...)`.
674674
675675- `regcoef::Float64=1e-6`: The regularization coefficient. A positive value
676676 `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the
677- diagonal of `Sw` to improve numerical stability. This can be useful if using the
677+ diagonal of `Sw` to improve numerical stability. This can be useful if using the
678678 standard covariance estimator.
679679
680- - `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real},
681- Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If
682- `priors = nothing` then `priors` are estimated from the class proportions in the
683- training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying
680+ - `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real},
681+ Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If
682+ `priors = nothing` then `priors` are estimated from the class proportions in the
683+ training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying
684684 the classes with non-zero probabilities in the training target.
685685
686686# Operations
@@ -689,7 +689,7 @@ Train the machine using `fit!(mach, rows=...)`.
689689 should have the same scitype as `X` above.
690690
691691- `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which
692- should have the same scitype as `X` above. Predictions are probabilistic but
692+ should have the same scitype as `X` above. Predictions are probabilistic but
693693 uncalibrated.
694694
695695- `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned
@@ -703,7 +703,7 @@ The fields of `fitted_params(mach)` are:
703703- `classes`: The classes seen during model fitting.
704704
705705- `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where
706- `indim` and `outdim` are the input and output dimensions respectively (See Report
706+ `indim` and `outdim` are the input and output dimensions respectively (See Report
707707 section below).
708708
709709- `priors`: The class priors for classification. As inferred from training target `y`, if
@@ -713,7 +713,7 @@ The fields of `fitted_params(mach)` are:
713713
714714The fields of `report(mach)` are:
715715
716- - `indim`: The dimension of the input space i.e the number of features of training matrix .
716+ - `indim`: The dimension of the input space i.e the number of training features .
717717
718718- `outdim`: The dimension of the transformed space the model is projected to.
719719
@@ -722,12 +722,12 @@ The fields of `report(mach)` are:
722722- `nclasses`: The number of classes directly observed in the training data (which can be
723723 less than the total number of classes in the class pool).
724724
725- - `class_means`: The class-specific means of the training data. A matrix of size
726- `(indim, nclasses)` with the ith column being the class-mean of the ith class in
725+ - `class_means`: The class-specific means of the training data. A matrix of size
726+ `(indim, nclasses)` with the ith column being the class-mean of the ith class in
727727 `classes` (See fitted params section above).
728728
729- - `class_weights`: The weights (class counts) of each class. A vector of length
730- `nclasses` with the ith element being the class weight of the ith class in
729+ - `class_weights`: The weights (class counts) of each class. A vector of length
730+ `nclasses` with the ith element being the class weight of the ith class in
731731 `classes`. (See fitted params section above.)
732732
733733- `Sb`: The between class scatter matrix.
@@ -761,14 +761,14 @@ BayesianLDA
761761$(MMI. doc_header (SubspaceLDA))
762762
763763Multiclass subspace linear discriminant analysis (LDA) is a variation on ordinary
764- [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices.
764+ [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices.
765765For details, refer the [MultivariateStats.jl
766766documentation](https://juliastats.org/MultivariateStats.jl/stable/).
767767
768768In addition to dimension reduction (using `transform`) probabilistic classification is
769769provided (using `predict`). In the case of classification, the class probability for a new
770770observation reflects the proximity of that observation to training observations associated
771- with that class, and how far away the observation is from observations associated with
771+ with that class, and how far away the observation is from observations associated with
772772other classes. Specifically, the distances, in the transformed (projected) space, of a new
773773observation, from the centroid of each target class, is computed; the resulting vector of
774774distances, multiplied by minus one, is passed to a softmax function to obtain a class
@@ -783,26 +783,26 @@ In MLJ or MLJBase, bind an instance `model` to data with
783783
784784Here:
785785
786- - `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype
786+ - `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype
787787 `Continuous`; check column scitypes with `schema(X)`.
788788
789- - `y` is the target, which can be any `AbstractVector` whose element scitype is
789+ - `y` is the target, which can be any `AbstractVector` whose element scitype is
790790 `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.
791791
792792Train the machine using `fit!(mach, rows=...)`.
793793
794794# Hyper-parameters
795795
796- - `normalize=true`: Option to normalize the between class variance for the number of
796+ - `normalize=true`: Option to normalize the between class variance for the number of
797797 observations in each class, one of `true` or `false`.
798798
799- - `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal
800- to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is
799+ - `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal
800+ to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is
801801 `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.
802802
803803- `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification
804- (to compare the distance between a new point and centroids in the transformed space);
805- must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g.,
804+ (to compare the distance between a new point and centroids in the transformed space);
805+ must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g.,
806806 `Distances.CosineDist`.
807807
808808
@@ -825,14 +825,14 @@ The fields of `fitted_params(mach)` are:
825825- `classes`: The classes seen during model fitting.
826826
827827- `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where
828- `indim` and `outdim` are the input and output dimensions respectively (See Report
828+ `indim` and `outdim` are the input and output dimensions respectively (See Report
829829 section below).
830830
831831# Report
832832
833833The fields of `report(mach)` are:
834834
835- - `indim`: The dimension of the input space i.e the number of features of training matrix .
835+ - `indim`: The dimension of the input space i.e the number of training features .
836836
837837- `outdim`: The dimension of the transformed space the model is projected to.
838838
@@ -841,15 +841,15 @@ The fields of `report(mach)` are:
841841- `nclasses`: The number of classes directly observed in the training data (which can be
842842 less than the total number of classes in the class pool)
843843
844- `class_means`: The class-specific means of the training data. A matrix of size
845- `(indim, nclasses)` with the ith column being the class-mean of the ith class in
844+ `class_means`: The class-specific means of the training data. A matrix of size
845+ `(indim, nclasses)` with the ith column being the class-mean of the ith class in
846846 `classes` (See fitted params section above).
847847
848- - `class_weights`: The weights (class counts) of each class. A vector of length
849- `nclasses` with the ith element being the class weight of the ith class in
848+ - `class_weights`: The weights (class counts) of each class. A vector of length
849+ `nclasses` with the ith element being the class weight of the ith class in
850850 `classes`. (See fitted params section above.)
851851
852- - `explained_variance_ratio`: The ratio of explained variance to total variance. Each
852+ - `explained_variance_ratio`: The ratio of explained variance to total variance. Each
853853 dimension corresponds to an eigenvalue.
854854
855855# Examples
@@ -903,14 +903,14 @@ Train the machine using `fit!(mach, rows=...)`.
903903- `normalize=true`: Option to normalize the between class variance for the number of
904904 observations in each class, one of `true` or `false`.
905905
906- `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal
907- to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is
906+ `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal
907+ to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is
908908 `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.
909909
910- - `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real},
911- Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If
912- `priors = nothing` then `priors` are estimated from the class proportions in the
913- training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying
910+ - `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real},
911+ Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If
912+ `priors = nothing` then `priors` are estimated from the class proportions in the
913+ training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying
914914 the classes with non-zero probabilities in the training target.
915915
916916
@@ -933,7 +933,7 @@ The fields of `fitted_params(mach)` are:
933933- `classes`: The classes seen during model fitting.
934934
935935- `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where
936- `indim` and `outdim` are the input and output dimensions respectively (See Report
936+ `indim` and `outdim` are the input and output dimensions respectively (See Report
937937 section below).
938938
939939- `priors`: The class priors for classification. As inferred from training target `y`, if
@@ -943,7 +943,7 @@ The fields of `fitted_params(mach)` are:
943943
944944The fields of `report(mach)` are:
945945
946- - `indim`: The dimension of the input space i.e the number of features of training matrix .
946+ - `indim`: The dimension of the input space i.e the number of training features .
947947
948948- `outdim`: The dimension of the transformed space the model is projected to.
949949
@@ -952,15 +952,15 @@ The fields of `report(mach)` are:
952952- `nclasses`: The number of classes directly observed in the training data (which can be
953953 less than the total number of classes in the class pool).
954954
955- `class_means`: The class-specific means of the training data. A matrix of size
956- `(indim, nclasses)` with the ith column being the class-mean of the ith class in
955+ `class_means`: The class-specific means of the training data. A matrix of size
956+ `(indim, nclasses)` with the ith column being the class-mean of the ith class in
957957 `classes` (See fitted params section above).
958958
959- - `class_weights`: The weights (class counts) of each class. A vector of length `nclasses`
960- with the ith element being the class weight of the ith class in `classes`. (See
959+ - `class_weights`: The weights (class counts) of each class. A vector of length `nclasses`
960+ with the ith element being the class weight of the ith class in `classes`. (See
961961 fitted params section above.)
962962
963- - `explained_variance_ratio`: The ratio of explained variance to total variance. Each
963+ - `explained_variance_ratio`: The ratio of explained variance to total variance. Each
964964 dimension corresponds to an eigenvalue.
965965
966966# Examples
0 commit comments