diff --git a/Project.toml b/Project.toml index af20ac6..7294d79 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.17.9" +version = "0.18.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -38,6 +38,7 @@ Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" MLJModelInterface = "1.10" +MLJTransforms = "0.1.1" Markdown = "1" OrderedCollections = "1.1" Parameters = "0.12" @@ -58,10 +59,11 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"] +test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "MLJTransforms", "Pkg", "StableRNGs", "Suppressor", "Test"] diff --git a/README.md b/README.md index a8f4ebe..2fb1892 100644 --- a/README.md +++ b/README.md @@ -3,77 +3,74 @@ [![Build Status](https://github.com/JuliaAI/MLJModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/MLJModels.jl/actions) [![codecov](https://codecov.io/gh/JuliaAI/MLJModels.jl/graph/badge.svg?token=KgarnnCc0K)](https://codecov.io/gh/JuliaAI/MLJModels.jl) -Repository of the "built-in" models available for use in the -[MLJ](https://github.com/JuliaAI/MLJ.jl) MLJ machine -learning framework; and the home of the MLJ model registry. +Home of the [MLJ](https://juliaml.ai) Model Registry and tools for model search and model code loading. -For instructions on integrating a new model with MLJ visit -[here](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/) +For instructions on integrating a new model into MLJ visit +[here](https://juliaai.github.io/MLJModelInterface.jl/stable/). ### Contents - [Who is this repo for?](#who-is-this-repo-for) + - [How to register new models](#how-to-register-new-models) - [What is provided here?](#what-is-provided-here) - - [Instructions for updating the MLJ model registry](#instructions-for-updating-the-mlj-model-registry) ## Who is this repo for? -General users of the MLJ machine learning platform should refer to -[MLJ home page](https://JuliaAI.github.io/MLJ.jl/dev/) -for usage and installation instructions. MLJModels is a dependency of -MLJ that the general user can ignore. +Newcomers to MLJ should refer to [this page](https://juliaml.ai) for usage and +installation instructions. MLJModels.jl is a dependency of MLJ that the general user can +ignore. -This repository is for developers wishing to -[register](#instructions-for-updating-the-mlj-model-registry) new MLJ -model interfaces, whether they be: +This repository is for developers maintaining: -- implemented **natively** in a - package providing the core machine learning algorithm, as in +- The [MLJ Model Registry](/src/registry), a database of packages implementing the MLJ + interface for machine learning models, together with metadata about those models. + +- MLJ tools for searching the database (`models(...)` and `matching(...)`) and for loading + model code (`@load`, `@iload`). + +## How to register new models + +The model registry lives at "/src/registry" but +is maintained using +[MLJModelRegistryTools.jl](https://juliaai.github.io/MLJModelRegistryTools.jl/dev/). + +New MLJ model interfaces can be implemented either: + +- **natively** in a package providing the core machine learning algorithm, as in [`EvoTrees.jl`](https://github.com/Evovest/EvoTrees.jl/blob/master/src/MLJ.jl); or -- implemented in a separate **interface package**, such as +- in a separate **interface package**, such as [MLJDecisionTreeInterface.jl](https://github.com/JuliaAI/MLJDecisionTreeInterface.jl). -It also a place for developers to add models (mostly transformers) -such as `OneHotEncoder`, that are exported for "built-in" use in -MLJ. (In the future these models may live in a separate package.) - -To list *all* model interfaces currently registered, do `using MLJ` or -`using MLJModels` and run: +In either case, the package providing the implementation needs to be added to the MLJ +Model Registry to make it discoverable by MLJ users, and to make the model metadata +searchable. To register a package, prepare a pull request to MLJModels.jl by following [these instructions](https://juliaai.github.io/MLJModelRegistryTools.jl/dev/registry_management_tools/#Registry-management-tools). -- `localmodels()` to list built-in models (updated when external models are loaded with `@load`) +Currently, after registering the model, one must also make a PR to MLJ updating [this +dictionary of model +descriptors](https://github.com/JuliaAI/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) to +ensure the new models appear in the right places in MLJ's [Model +Browser](https://JuliaAI.github.io/MLJ.jl/dev/model_browser/#Model-Browser) -- `models()` to list all registered models, or see [this list](/src/registry/Models.toml). +To list *all* model interfaces currently registered, do `using MLJ` or `using MLJModels` +and run `models()` to list all registered models. Recall that an interface is loaded from within MLJ, together with the package providing the underlying algorithm, using the syntax `@load RidgeRegressor pkg=GLM`, where the `pkg` keyword is only necessary in ambiguous cases. - ## What is provided here? -MLJModels contains: - -- transformers to be pre-loaded into MLJ, located at - [/src/builtins](/src/builtins), such as `OneHotEncoder` - and `ConstantClassifier`. - -- the MLJ [model registry](src/registry/Metadata.toml), listing all - models that can be called from MLJ using `@load`. Package developers - can register new models by implementing the MLJ interface in their - package and following [these - instructions](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/). - - -## Instructions for updating the MLJ model registry +The actual MLJ Model Registry consists of the TOML files in [this +directory](/src/registry). A few models available for immediate use in MLJ (without +loading model code using `@load`) are also provided by this package, under "/src/builtins" +but these may be moved out in the future. -Generally model registration is performed by administrators. If you -have an interface you would like registered, open an issue -[here](https://github.com/JuliaAI/MLJ.jl/issues). +### Historical note -**Administrator instructions.** These are given in the -`MLJModels.@update` document string. After registering the model, make a PR to MLJ -updating [this dictionary of model descriptors](https://github.com/JuliaAI/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) -to ensure the new models appear in the right places in MLJ's [Model Browser](https://JuliaAI.github.io/MLJ.jl/dev/model_browser/#Model-Browser) +Older versions of MLJModels.jl contained some of the models now residing at +[MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl/tree/dev). Even older +versions provided implementations of all the non-native implementations of the +MLJ interface. diff --git a/src/MLJModels.jl b/src/MLJModels.jl index ffaae0c..3f85e81 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -60,7 +60,6 @@ include("utilities.jl") # load built-in models: include("builtins/Constant.jl") -include("builtins/Transformers.jl") include("builtins/ThresholdPredictors.jl") # declare paths to the metadata and associated project file: diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl deleted file mode 100644 index fd00d43..0000000 --- a/src/builtins/Transformers.jl +++ /dev/null @@ -1,2017 +0,0 @@ -# Note that doc-strings appear at the end - - -# # IMPUTER - -round_median(v::AbstractVector) = v -> round(eltype(v), median(v)) - -_median(e) = skipmissing(e) |> median -_round_median(e) = skipmissing(e) |> (f -> round(eltype(f), median(f))) -_mode(e) = skipmissing(e) |> mode - -@with_kw_noshow mutable struct UnivariateFillImputer <: Unsupervised - continuous_fill::Function = _median - count_fill::Function = _round_median - finite_fill::Function = _mode -end - -function MMI.fit(transformer::UnivariateFillImputer, - verbosity::Integer, - v) - - filler(v, ::Type) = throw(ArgumentError( - "Imputation is not supported for vectors "* - "of elscitype $(elscitype(v)).")) - filler(v, ::Type{<:Union{Continuous,Missing}}) = - transformer.continuous_fill(v) - filler(v, ::Type{<:Union{Count,Missing}}) = - transformer.count_fill(v) - filler(v, ::Type{<:Union{Finite,Missing}}) = - transformer.finite_fill(v) - - fitresult = (filler=filler(v, elscitype(v)),) - cache = nothing - report = NamedTuple() - - return fitresult, cache, report - -end - -function replace_missing(::Type{<:Finite}, vnew, filler) - all(in(levels(filler)), levels(vnew)) || - error(ArgumentError("The `column::AbstractVector{<:Finite}`"* - " to be transformed must contain the same levels"* - " as the categorical value to be imputed")) - replace(vnew, missing => filler) - -end - -function replace_missing(::Type, vnew, filler) - T = promote_type(nonmissing(eltype(vnew)), typeof(filler)) - w_tight = similar(vnew, T) - @inbounds for i in eachindex(vnew) - if ismissing(vnew[i]) - w_tight[i] = filler - else - w_tight[i] = vnew[i] - end - end - return w_tight -end - -function MMI.transform(transformer::UnivariateFillImputer, - fitresult, - vnew) - - filler = fitresult.filler - - scitype(filler) <: elscitype(vnew) || - error("Attempting to impute a value of scitype $(scitype(filler)) "* - "into a vector of incompatible elscitype, namely $(elscitype(vnew)). ") - - if elscitype(vnew) >: Missing - w_tight = replace_missing(nonmissing(elscitype(vnew)), vnew, filler) - else - w_tight = vnew - end - - return w_tight -end - -MMI.fitted_params(::UnivariateFillImputer, fitresult) = fitresult - -@with_kw_noshow mutable struct FillImputer <: Unsupervised - features::Vector{Symbol} = Symbol[] - continuous_fill::Function = _median - count_fill::Function = _round_median - finite_fill::Function = _mode -end - -function MMI.fit(transformer::FillImputer, verbosity::Int, X) - - s = schema(X) - features_seen = s.names |> collect # "seen" = "seen in fit" - scitypes_seen = s.scitypes |> collect - - features = isempty(transformer.features) ? features_seen : - transformer.features - - issubset(features, features_seen) || throw(ArgumentError( - "Some features specified do not exist in the supplied table. ")) - - # get corresponding scitypes: - mask = map(features_seen) do ftr - ftr in features - end - features = @view features_seen[mask] # `features` re-ordered - scitypes = @view scitypes_seen[mask] - features_and_scitypes = zip(features, scitypes) #|> collect - - # now keep those features that are imputable: - function isimputable(ftr, T::Type) - if verbosity > 0 && !isempty(transformer.features) - @info "Feature $ftr will not be imputed "* - "(imputation for $T not supported). " - end - return false - end - isimputable(ftr, ::Type{<:Union{Continuous,Missing}}) = true - isimputable(ftr, ::Type{<:Union{Count,Missing}}) = true - isimputable(ftr, ::Type{<:Union{Finite,Missing}}) = true - - mask = map(features_and_scitypes) do tup - isimputable(tup...) - end - features_to_be_imputed = @view features[mask] - - univariate_transformer = - UnivariateFillImputer(continuous_fill=transformer.continuous_fill, - count_fill=transformer.count_fill, - finite_fill=transformer.finite_fill) - univariate_fitresult(ftr) = MMI.fit(univariate_transformer, - verbosity - 1, - selectcols(X, ftr))[1] - - fitresult_given_feature = - Dict(ftr=> univariate_fitresult(ftr) for ftr in features_to_be_imputed) - - fitresult = (features_seen=features_seen, - univariate_transformer=univariate_transformer, - fitresult_given_feature=fitresult_given_feature) - report = NamedTuple() - cache = nothing - - return fitresult, cache, report -end - -function MMI.transform(transformer::FillImputer, fitresult, X) - - features_seen = fitresult.features_seen # seen in fit - univariate_transformer = fitresult.univariate_transformer - fitresult_given_feature = fitresult.fitresult_given_feature - - all_features = Tables.schema(X).names - - # check that no new features have appeared: - all(e -> e in features_seen, all_features) || throw(ArgumentError( - "Attempting to transform table with "* - "feature labels not seen in fit.\n"* - "Features seen in fit = $features_seen.\n"* - "Current features = $([all_features...]). ")) - - features = keys(fitresult_given_feature) - - cols = map(all_features) do ftr - col = MMI.selectcols(X, ftr) - if ftr in features - fr = fitresult_given_feature[ftr] - return transform(univariate_transformer, fr, col) - end - return col - end - - named_cols = NamedTuple{all_features}(tuple(cols...)) - return MMI.table(named_cols, prototype=X) - -end - -function MMI.fitted_params(::FillImputer, fr) - dict = fr.fitresult_given_feature - filler_given_feature = Dict(ftr=>dict[ftr].filler for ftr in keys(dict)) - return (features_seen_in_fit=fr.features_seen, - univariate_transformer=fr.univariate_transformer, - filler_given_feature=filler_given_feature) -end - -## UNIVARIATE DISCRETIZER - -# helper function: -reftype(::CategoricalArray{<:Any,<:Any,R}) where R = R - -@with_kw_noshow mutable struct UnivariateDiscretizer <:Unsupervised - n_classes::Int = 512 -end - -struct UnivariateDiscretizerResult{C} - odd_quantiles::Vector{Float64} - even_quantiles::Vector{Float64} - element::C -end - -function MMI.fit(transformer::UnivariateDiscretizer, verbosity::Int, X) - n_classes = transformer.n_classes - quantiles = quantile(X, Array(range(0, stop=1, length=2*n_classes+1))) - clipped_quantiles = quantiles[2:2*n_classes] # drop 0% and 100% quantiles - - # odd_quantiles for transforming, even_quantiles used for - # inverse_transforming: - odd_quantiles = clipped_quantiles[2:2:(2*n_classes-2)] - even_quantiles = clipped_quantiles[1:2:(2*n_classes-1)] - - # determine optimal reference type for encoding as categorical: - R = reftype(categorical(1:n_classes, compress=true)) - output_prototype = categorical(R(1):R(n_classes), compress=true, ordered=true) - element = output_prototype[1] - - cache = nothing - report = NamedTuple() - - res = UnivariateDiscretizerResult(odd_quantiles, even_quantiles, element) - return res, cache, report -end - -# acts on scalars: -function transform_to_int( - result::UnivariateDiscretizerResult{<:CategoricalValue{R}}, - r::Real) where R - k = oneR = R(1) - @inbounds for q in result.odd_quantiles - if r > q - k += oneR - end - end - return k -end - -# transforming scalars: -MMI.transform(::UnivariateDiscretizer, result, r::Real) = - transform(result.element, transform_to_int(result, r)) - -# transforming vectors: -function MMI.transform(::UnivariateDiscretizer, result, v) - w = [transform_to_int(result, r) for r in v] - return transform(result.element, w) -end - -# inverse_transforming raw scalars: -function MMI.inverse_transform( - transformer::UnivariateDiscretizer, result , k::Integer) - k <= transformer.n_classes && k > 0 || - error("Cannot transform an integer outside the range "* - "`[1, n_classes]`, where `n_classes = $(transformer.n_classes)`") - return result.even_quantiles[k] -end - -# inverse transforming a categorical value: -function MMI.inverse_transform( - transformer::UnivariateDiscretizer, result, e::CategoricalValue) - k = CategoricalArrays.DataAPI.unwrap(e) - return inverse_transform(transformer, result, k) -end - -# inverse transforming raw vectors: -MMI.inverse_transform(transformer::UnivariateDiscretizer, result, - w::AbstractVector{<:Integer}) = - [inverse_transform(transformer, result, k) for k in w] - -# inverse transforming vectors of categorical elements: -function MMI.inverse_transform(transformer::UnivariateDiscretizer, result, - wcat::AbstractVector{<:CategoricalValue}) - w = MMI.int(wcat) - return [inverse_transform(transformer, result, k) for k in w] -end - -MMI.fitted_params(::UnivariateDiscretizer, fitresult) = ( - odd_quantiles=fitresult.odd_quantiles, - even_quantiles=fitresult.even_quantiles -) - - -# # CONTINUOUS TRANSFORM OF TIME TYPE FEATURES - -mutable struct UnivariateTimeTypeToContinuous <: Unsupervised - zero_time::Union{Nothing, TimeType} - step::Period -end - -function UnivariateTimeTypeToContinuous(; - zero_time=nothing, step=Dates.Hour(24)) - model = UnivariateTimeTypeToContinuous(zero_time, step) - message = MMI.clean!(model) - isempty(message) || @warn message - return model -end - -function MMI.clean!(model::UnivariateTimeTypeToContinuous) - # Step must be able to be added to zero_time if provided. - msg = "" - if model.zero_time !== nothing - try - tmp = model.zero_time + model.step - catch err - if err isa MethodError - model.zero_time, model.step, status, msg = _fix_zero_time_step( - model.zero_time, model.step) - if status === :error - # Unable to resolve, rethrow original error. - throw(err) - end - else - throw(err) - end - end - end - return msg -end - -function _fix_zero_time_step(zero_time, step) - # Cannot add time parts to dates nor date parts to times. - # If a mismatch is encountered. Conversion from date parts to time parts - # is possible, but not from time parts to date parts because we cannot - # represent fractional date parts. - msg = "" - if zero_time isa Dates.Date && step isa Dates.TimePeriod - # Convert zero_time to a DateTime to resolve conflict. - if step % Hour(24) === Hour(0) - # We can convert step to Day safely - msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `step` to `Day`." - step = convert(Day, step) - else - # We need datetime to be compatible with the step. - msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`." - zero_time = convert(DateTime, zero_time) - end - return zero_time, step, :success, msg - elseif zero_time isa Dates.Time && step isa Dates.DatePeriod - # Convert step to Hour if possible. This will fail for - # isa(step, Month) - msg = "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`." - step = convert(Hour, step) - return zero_time, step, :success, msg - else - return zero_time, step, :error, msg - end -end - -function MMI.fit(model::UnivariateTimeTypeToContinuous, verbosity::Int, X) - if model.zero_time !== nothing - min_dt = model.zero_time - step = model.step - # Check zero_time is compatible with X - example = first(X) - try - X - min_dt - catch err - if err isa MethodError - @warn "`$(typeof(min_dt))` `zero_time` is not compatible with `$(eltype(X))` vector. Attempting to convert `zero_time`." - min_dt = convert(eltype(X), min_dt) - else - throw(err) - end - end - else - min_dt = minimum(X) - step = model.step - message = "" - try - min_dt + step - catch err - if err isa MethodError - min_dt, step, status, message = _fix_zero_time_step(min_dt, step) - if status === :error - # Unable to resolve, rethrow original error. - throw(err) - end - else - throw(err) - end - end - isempty(message) || @warn message - end - cache = nothing - report = NamedTuple() - fitresult = (min_dt, step) - return fitresult, cache, report -end - -function MMI.transform(model::UnivariateTimeTypeToContinuous, fitresult, X) - min_dt, step = fitresult - if typeof(min_dt) ≠ eltype(X) - # Cannot run if eltype in transform differs from zero_time from fit. - throw(ArgumentError("Different `TimeType` encountered during `transform` than expected from `fit`. Found `$(eltype(X))`, expected `$(typeof(min_dt))`")) - end - # Set the size of a single step. - next_time = min_dt + step - if next_time == min_dt - # Time type loops if step is a multiple of Hour(24), so calculate the - # number of multiples, then re-scale to Hour(12) and adjust delta to match original. - m = step / Dates.Hour(12) - delta = m * ( - Float64(Dates.value(min_dt + Dates.Hour(12)) - Dates.value(min_dt))) - else - delta = Float64(Dates.value(min_dt + step) - Dates.value(min_dt)) - end - return @. Float64(Dates.value(X - min_dt)) / delta -end - - -# # UNIVARIATE STANDARDIZATION - -""" - UnivariateStandardizer() - -Transformer type for standardizing (whitening) single variable data. - -This model may be deprecated in the future. Consider using -[`Standardizer`](@ref), which handles both tabular *and* univariate data. - -""" -mutable struct UnivariateStandardizer <: Unsupervised end - -function MMI.fit(transformer::UnivariateStandardizer, verbosity::Int, - v::AbstractVector{T}) where T<:Real - stdv = std(v) - stdv > eps(typeof(stdv)) || - @warn "Extremely small standard deviation encountered in standardization." - fitresult = (mean(v), stdv) - cache = nothing - report = NamedTuple() - return fitresult, cache, report -end - -MMI.fitted_params(::UnivariateStandardizer, fitresult) = - (mean=fitresult[1], std=fitresult[2]) - - -# for transforming single value: -function MMI.transform(transformer::UnivariateStandardizer, fitresult, x::Real) - mu, sigma = fitresult - return (x - mu)/sigma -end - -# for transforming vector: -MMI.transform(transformer::UnivariateStandardizer, fitresult, v) = - [transform(transformer, fitresult, x) for x in v] - -# for single values: -function MMI.inverse_transform(transformer::UnivariateStandardizer, fitresult, y::Real) - mu, sigma = fitresult - return mu + y*sigma -end - -# for vectors: -MMI.inverse_transform(transformer::UnivariateStandardizer, fitresult, w) = - [inverse_transform(transformer, fitresult, y) for y in w] - - -# # STANDARDIZATION OF ORDINAL FEATURES OF TABULAR DATA - -mutable struct Standardizer <: Unsupervised - # features to be standardized; empty means all - features::Union{AbstractVector{Symbol}, Function} - ignore::Bool # features to be ignored - ordered_factor::Bool - count::Bool -end - -# keyword constructor -function Standardizer( - ; - features::Union{AbstractVector{Symbol}, Function}=Symbol[], - ignore::Bool=false, - ordered_factor::Bool=false, - count::Bool=false -) - transformer = Standardizer(features, ignore, ordered_factor, count) - message = MMI.clean!(transformer) - isempty(message) || throw(ArgumentError(message)) - return transformer -end - -function MMI.clean!(transformer::Standardizer) - err = "" - if ( - typeof(transformer.features) <: AbstractVector{Symbol} && - isempty(transformer.features) && - transformer.ignore - ) - err *= "Features to be ignored must be specified in features field." - end - return err -end - -function MMI.fit(transformer::Standardizer, verbosity::Int, X) - - # if not a table, it must be an abstract vector, eltpye AbstractFloat: - is_univariate = !Tables.istable(X) - - # are we attempting to standardize Count or OrderedFactor? - is_invertible = !transformer.count && !transformer.ordered_factor - - # initialize fitresult: - fitresult_given_feature = LittleDict{Symbol,Tuple{AbstractFloat,AbstractFloat}}() - - # special univariate case: - if is_univariate - fitresult_given_feature[:unnamed] = - MMI.fit(UnivariateStandardizer(), verbosity - 1, X)[1] - return (is_univariate=true, - is_invertible=true, - fitresult_given_feature=fitresult_given_feature), - nothing, nothing - end - - all_features = Tables.schema(X).names - feature_scitypes = - collect(elscitype(selectcols(X, c)) for c in all_features) - scitypes = Vector{Type}([Continuous]) - transformer.ordered_factor && push!(scitypes, OrderedFactor) - transformer.count && push!(scitypes, Count) - AllowedScitype = Union{scitypes...} - - # determine indices of all_features to be transformed - if transformer.features isa AbstractVector{Symbol} - if isempty(transformer.features) - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - feature_scitypes[j] <: AllowedScitype - end - else - !issubset(transformer.features, all_features) && verbosity > -1 && - @warn "Some specified features not present in table to be fit. " - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - ifelse( - transformer.ignore, - !(all_features[j] in transformer.features) && - feature_scitypes[j] <: AllowedScitype, - (all_features[j] in transformer.features) && - feature_scitypes[j] <: AllowedScitype - ) - end - end - else - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - ifelse( - transformer.ignore, - !(transformer.features(all_features[j])) && - feature_scitypes[j] <: AllowedScitype, - (transformer.features(all_features[j])) && - feature_scitypes[j] <: AllowedScitype - ) - end - end - - isempty(cols_to_fit) && verbosity > -1 && - @warn "No features to standarize." - - # fit each feature and add result to above dict - verbosity > 1 && @info "Features standarized: " - for j in cols_to_fit - col_data = if (feature_scitypes[j] <: OrderedFactor) - coerce(selectcols(X, j), Continuous) - else - selectcols(X, j) - end - col_fitresult, _, _ = - MMI.fit(UnivariateStandardizer(), verbosity - 1, col_data) - fitresult_given_feature[all_features[j]] = col_fitresult - verbosity > 1 && - @info " :$(all_features[j]) mu=$(col_fitresult[1]) "* - "sigma=$(col_fitresult[2])" - end - - fitresult = (is_univariate=false, is_invertible=is_invertible, - fitresult_given_feature=fitresult_given_feature) - cache = nothing - report = (features_fit=keys(fitresult_given_feature),) - - return fitresult, cache, report -end - -function MMI.fitted_params(::Standardizer, fitresult) - is_univariate, _, dic = fitresult - is_univariate && - return fitted_params(UnivariateStandardizer(), dic[:unnamed]) - features_fit = keys(dic) |> collect - zipped = map(ftr->dic[ftr], features_fit) - means, stds = zip(zipped...) |> collect - return (; features_fit, means, stds) -end - -MMI.transform(::Standardizer, fitresult, X) = - _standardize(transform, fitresult, X) - -function MMI.inverse_transform(::Standardizer, fitresult, X) - fitresult.is_invertible || - error("Inverse standardization is not supported when `count=true` "* - "or `ordered_factor=true` during fit. ") - return _standardize(inverse_transform, fitresult, X) -end - -function _standardize(operation, fitresult, X) - - # `fitresult` is dict of column fitresults, keyed on feature names - is_univariate, _, fitresult_given_feature = fitresult - - if is_univariate - univariate_fitresult = fitresult_given_feature[:unnamed] - return operation(UnivariateStandardizer(), univariate_fitresult, X) - end - - features_to_be_transformed = keys(fitresult_given_feature) - - all_features = Tables.schema(X).names - - all(e -> e in all_features, features_to_be_transformed) || - error("Attempting to transform data with incompatible feature labels.") - - col_transformer = UnivariateStandardizer() - - cols = map(all_features) do ftr - ftr_data = selectcols(X, ftr) - if ftr in features_to_be_transformed - col_to_transform = coerce(ftr_data, Continuous) - operation(col_transformer, - fitresult_given_feature[ftr], - col_to_transform) - else - ftr_data - end - end - - named_cols = NamedTuple{all_features}(tuple(cols...)) - - return MMI.table(named_cols, prototype=X) -end - - -# # UNIVARIATE BOX-COX TRANSFORMATIONS - -function standardize(v) - map(v) do x - (x - mean(v))/std(v) - end -end - -function midpoints(v::AbstractVector{T}) where T <: Real - return [0.5*(v[i] + v[i + 1]) for i in 1:(length(v) -1)] -end - -function normality(v) - n = length(v) - v = standardize(convert(Vector{Float64}, v)) - # sort and replace with midpoints - v = midpoints(sort!(v)) - # find the (approximate) expected value of the size (n-1)-ordered statistics for - # standard normal: - d = Distributions.Normal(0,1) - w = map(collect(1:(n-1))/n) do x - quantile(d, x) - end - return cor(v, w) -end - -function boxcox(lambda, c, x::Real) - c + x >= 0 || throw(DomainError) - if lambda == 0.0 - c + x > 0 || throw(DomainError) - return log(c + x) - end - return ((c + x)^lambda - 1)/lambda -end - -boxcox(lambda, c, v::AbstractVector{T}) where T <: Real = - [boxcox(lambda, c, x) for x in v] - -@with_kw_noshow mutable struct UnivariateBoxCoxTransformer <: Unsupervised - n::Int = 171 # nbr values tried in optimizing exponent lambda - shift::Bool = false # whether to shift data away from zero -end - -function MMI.fit(transformer::UnivariateBoxCoxTransformer, verbosity::Int, - v::AbstractVector{T}) where T <: Real - - m = minimum(v) - m >= 0 || error("Cannot perform a Box-Cox transformation on negative data.") - - c = 0.0 # default - if transformer.shift - if m == 0 - c = 0.2*mean(v) - end - else - m != 0 || error("Zero value encountered in data being Box-Cox transformed.\n"* - "Consider calling `fit!` with `shift=true`.") - end - - lambdas = range(-0.4, stop=3, length=transformer.n) - scores = Float64[normality(boxcox(l, c, v)) for l in lambdas] - lambda = lambdas[argmax(scores)] - - return (lambda, c), nothing, NamedTuple() -end - -MMI.fitted_params(::UnivariateBoxCoxTransformer, fitresult) = - (λ=fitresult[1], c=fitresult[2]) - -# for X scalar or vector: -MMI.transform(transformer::UnivariateBoxCoxTransformer, fitresult, X) = - boxcox(fitresult..., X) - -# scalar case: -function MMI.inverse_transform(transformer::UnivariateBoxCoxTransformer, - fitresult, x::Real) - lambda, c = fitresult - if lambda == 0 - return exp(x) - c - else - return (lambda*x + 1)^(1/lambda) - c - end -end - -# vector case: -function MMI.inverse_transform(transformer::UnivariateBoxCoxTransformer, - fitresult, w::AbstractVector{T}) where T <: Real - return [inverse_transform(transformer, fitresult, y) for y in w] -end - - -# # ONE HOT ENCODING - -@with_kw_noshow mutable struct OneHotEncoder <: Unsupervised - features::Vector{Symbol} = Symbol[] - drop_last::Bool = false - ordered_factor::Bool = true - ignore::Bool = false -end - -# we store the categorical refs for each feature to be encoded and the -# corresponing feature labels generated (called -# "names"). `all_features` is stored to ensure no new features appear -# in new input data, causing potential name clashes. -struct OneHotEncoderResult <: MMI.MLJType - all_features::Vector{Symbol} # all feature labels - ref_name_pairs_given_feature::Dict{Symbol,Vector{Union{Pair{<:Unsigned,Symbol}, Pair{Missing, Symbol}}}} - fitted_levels_given_feature::Dict{Symbol, CategoricalArray} -end - -# join feature and level into new label without clashing with anything -# in all_features: -function compound_label(all_features, feature, level) - label = Symbol(string(feature, "__", level)) - # in the (rare) case subft is not a new feature label: - while label in all_features - label = Symbol(string(label,"_")) - end - return label -end - -function MMI.fit(transformer::OneHotEncoder, verbosity::Int, X) - - all_features = Tables.schema(X).names # a tuple not vector - - if isempty(transformer.features) - specified_features = collect(all_features) - else - if transformer.ignore - specified_features = filter(all_features |> collect) do ftr - !(ftr in transformer.features) - end - else - specified_features = transformer.features - end - end - - - ref_name_pairs_given_feature = Dict{Symbol,Vector{Pair{<:Unsigned,Symbol}}}() - - allowed_scitypes = ifelse( - transformer.ordered_factor, - Union{Missing, Finite}, - Union{Missing, Multiclass} - ) - fitted_levels_given_feature = Dict{Symbol, CategoricalArray}() - col_scitypes = schema(X).scitypes - # apply on each feature - for j in eachindex(all_features) - ftr = all_features[j] - col = MMI.selectcols(X,j) - T = col_scitypes[j] - if T <: allowed_scitypes && ftr in specified_features - ref_name_pairs_given_feature[ftr] = Pair{<:Unsigned,Symbol}[] - shift = transformer.drop_last ? 1 : 0 - levels = classes(col) - fitted_levels_given_feature[ftr] = levels - if verbosity > 0 - @info "Spawning $(length(levels)-shift) sub-features "* - "to one-hot encode feature :$ftr." - end - for level in levels[1:end-shift] - ref = MMI.int(level) - name = compound_label(all_features, ftr, level) - push!(ref_name_pairs_given_feature[ftr], ref => name) - end - end - end - - fitresult = OneHotEncoderResult(collect(all_features), - ref_name_pairs_given_feature, - fitted_levels_given_feature) - - # get new feature names - d = ref_name_pairs_given_feature - new_features = Symbol[] - features_to_be_transformed = keys(d) - for ftr in all_features - if ftr in features_to_be_transformed - append!(new_features, last.(d[ftr])) - else - push!(new_features, ftr) - end - end - - report = (features_to_be_encoded= - collect(keys(ref_name_pairs_given_feature)), - new_features=new_features) - cache = nothing - - return fitresult, cache, report -end - -MMI.fitted_params(::OneHotEncoder, fitresult) = ( - all_features = fitresult.all_features, - fitted_levels_given_feature = fitresult.fitted_levels_given_feature, - ref_name_pairs_given_feature = fitresult.ref_name_pairs_given_feature, -) - -# If v=categorical('a', 'a', 'b', 'a', 'c') and MMI.int(v[1]) = ref -# then `_hot(v, ref) = [true, true, false, true, false]` -hot(v::AbstractVector{<:CategoricalValue}, ref) = map(v) do c - MMI.int(c) == ref -end - -function hot(col::AbstractVector{<:Union{Missing, CategoricalValue}}, ref) map(col) do c - if ismissing(ref) - missing - else - MMI.int(c) == ref - end -end -end - -function MMI.transform(transformer::OneHotEncoder, fitresult, X) - features = Tables.schema(X).names # tuple not vector - - d = fitresult.ref_name_pairs_given_feature - - # check the features match the fit result - all(e -> e in fitresult.all_features, features) || - error("Attempting to transform table with feature "* - "names not seen in fit. ") - new_features = Symbol[] - new_cols = [] # not Vector[] !! - features_to_be_transformed = keys(d) - for ftr in features - col = MMI.selectcols(X, ftr) - if ftr in features_to_be_transformed - Set(fitresult.fitted_levels_given_feature[ftr]) == - Set(classes(col)) || - error("Found category level mismatch in feature `$(ftr)`. "* - "Consider using `levels!` to ensure fitted and transforming "* - "features have the same category levels.") - append!(new_features, last.(d[ftr])) - pairs = d[ftr] - refs = first.(pairs) - names = last.(pairs) - cols_to_add = map(refs) do ref - if ismissing(ref) missing - else float.(hot(col, ref)) - end - end - append!(new_cols, cols_to_add) - else - push!(new_features, ftr) - push!(new_cols, col) - end - end - named_cols = NamedTuple{tuple(new_features...)}(tuple(new_cols)...) - return MMI.table(named_cols, prototype=X) -end - - -# # CONTINUOUS_ENCODING - -@with_kw_noshow mutable struct ContinuousEncoder <: Unsupervised - drop_last::Bool = false - one_hot_ordered_factors::Bool = false -end - -function MMI.fit(transformer::ContinuousEncoder, verbosity::Int, X) - - # what features can be converted and therefore kept? - s = schema(X) - features = s.names - scitypes = s.scitypes - Convertible = Union{Continuous, Finite, Count} - feature_scitype_tuples = zip(features, scitypes) |> collect - features_to_keep = - first.(filter(t -> last(t) <: Convertible, feature_scitype_tuples)) - features_to_be_dropped = setdiff(collect(features), features_to_keep) - - if verbosity > 0 - if !isempty(features_to_be_dropped) - @info "Some features cannot be replaced with "* - "`Continuous` features and will be dropped: "* - "$features_to_be_dropped. " - end - end - - # fit the one-hot encoder: - hot_encoder = - OneHotEncoder(ordered_factor=transformer.one_hot_ordered_factors, - drop_last=transformer.drop_last) - hot_fitresult, _, hot_report = MMI.fit(hot_encoder, verbosity - 1, X) - - new_features = setdiff(hot_report.new_features, features_to_be_dropped) - - fitresult = (features_to_keep=features_to_keep, - one_hot_encoder=hot_encoder, - one_hot_encoder_fitresult=hot_fitresult) - - # generate the report: - report = (features_to_keep=features_to_keep, - new_features=new_features) - - cache = nothing - - return fitresult, cache, report - -end - -MMI.fitted_params(::ContinuousEncoder, fitresult) = fitresult - -function MMI.transform(transformer::ContinuousEncoder, fitresult, X) - - features_to_keep, hot_encoder, hot_fitresult = values(fitresult) - - # dump unseen or untransformable features: - if !issubset(features_to_keep, MMI.schema(X).names) - throw( - ArgumentError( - "Supplied frame does not admit previously selected features." - ) - ) - end - X0 = MMI.selectcols(X, features_to_keep) - - # one-hot encode: - X1 = transform(hot_encoder, hot_fitresult, X0) - - # convert remaining to continuous: - return coerce(X1, Count=>Continuous, OrderedFactor=>Continuous) - -end - - -# # INTERACTION TRANSFORMER - -@mlj_model mutable struct InteractionTransformer <: Static - order::Int = 2::(_ > 1) - features::Union{Nothing, Vector{Symbol}} = nothing::(_ !== nothing ? length(_) > 1 : true) -end - -infinite_scitype(col) = eltype(scitype(col)) <: Infinite - -actualfeatures(features::Nothing, table) = - filter(feature -> infinite_scitype(Tables.getcolumn(table, feature)), Tables.columnnames(table)) - -function actualfeatures(features::Vector{Symbol}, table) - diff = setdiff(features, Tables.columnnames(table)) - diff != [] && throw(ArgumentError(string("Column(s) ", join([x for x in diff], ", "), " are not in the dataset."))) - - for feature in features - infinite_scitype(Tables.getcolumn(table, feature)) || throw(ArgumentError("Column $feature's scitype is not Infinite.")) - end - return Tuple(features) -end - -interactions(columns, order::Int) = - collect(Iterators.flatten(combinations(columns, i) for i in 2:order)) - -interactions(columns, variables...) = - .*((Tables.getcolumn(columns, var) for var in variables)...) - -function MMI.transform(model::InteractionTransformer, _, X) - features = actualfeatures(model.features, X) - interactions_ = interactions(features, model.order) - interaction_features = Tuple(Symbol(join(inter, "_")) for inter in interactions_) - columns = Tables.Columns(X) - interaction_table = NamedTuple{interaction_features}([interactions(columns, inter...) for inter in interactions_]) - return merge(Tables.columntable(X), interaction_table) -end - -# # METADATA FOR ALL BUILT-IN TRANSFORMERS - -metadata_pkg.( - ( - UnivariateStandardizer, - UnivariateDiscretizer, - Standardizer, - UnivariateBoxCoxTransformer, - UnivariateFillImputer, - OneHotEncoder, - FillImputer, - ContinuousEncoder, - UnivariateTimeTypeToContinuous, - InteractionTransformer - ), - package_name = "MLJModels", - package_uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7", - package_url = "https://github.com/JuliaAI/MLJModels.jl", - is_pure_julia = true, - package_license = "MIT") - -metadata_model(UnivariateFillImputer, - input_scitype = Union{AbstractVector{<:Union{Continuous,Missing}}, - AbstractVector{<:Union{Count,Missing}}, - AbstractVector{<:Union{Finite,Missing}}}, - output_scitype= Union{AbstractVector{<:Continuous}, - AbstractVector{<:Count}, - AbstractVector{<:Finite}}, - human_name = "single variable fill imputer", - load_path = "MLJModels.UnivariateFillImputer") - -metadata_model(FillImputer, - input_scitype = Table, - output_scitype = Table, - load_path = "MLJModels.FillImputer") - -metadata_model(UnivariateDiscretizer, - input_scitype = AbstractVector{<:Continuous}, - output_scitype = AbstractVector{<:OrderedFactor}, - human_name = "single variable discretizer", - load_path = "MLJModels.UnivariateDiscretizer") - -metadata_model(UnivariateStandardizer, - input_scitype = AbstractVector{<:Infinite}, - output_scitype = AbstractVector{Continuous}, - human_name = "single variable discretizer", - load_path = "MLJModels.UnivariateStandardizer") - -metadata_model(Standardizer, - input_scitype = Union{Table, AbstractVector{<:Continuous}}, - output_scitype = Union{Table, AbstractVector{<:Continuous}}, - load_path = "MLJModels.Standardizer") - -metadata_model(UnivariateBoxCoxTransformer, - input_scitype = AbstractVector{Continuous}, - output_scitype = AbstractVector{Continuous}, - human_name = "single variable Box-Cox transformer", - load_path = "MLJModels.UnivariateBoxCoxTransformer") - -metadata_model(OneHotEncoder, - input_scitype = Table, - output_scitype = Table, - human_name = "one-hot encoder", - load_path = "MLJModels.OneHotEncoder") - -metadata_model(ContinuousEncoder, - input_scitype = Table, - output_scitype = Table(Continuous), - load_path = "MLJModels.ContinuousEncoder") - -metadata_model(UnivariateTimeTypeToContinuous, - input_scitype = AbstractVector{<:ScientificTimeType}, - output_scitype = AbstractVector{Continuous}, - human_name ="single variable transformer that creates "* - "continuous representations of temporally typed data", - load_path = "MLJModels.UnivariateTimeTypeToContinuous") - -metadata_model(InteractionTransformer, - input_scitype = Tuple{Table}, - output_scitype = Table, - human_name = "interaction transformer", - load_path = "MLJModels.InteractionTransformer") - -# # DOC STRINGS - -# The following document strings comply with the MLJ standard. - -""" -$(MLJModelInterface.doc_header(UnivariateFillImputer)) - -Use this model to imputing `missing` values in a vector with a fixed -value learned from the non-missing values of training vector. - -For imputing missing values in tabular data, use [`FillImputer`](@ref) -instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with element scitype `Union{Missing, T}` - where `T` is a subtype of `Continuous`, `Multiclass`, - `OrderedFactor` or `Count`; check scitype using `scitype(x)` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `continuous_fill`: function or other callable to determine value to - be imputed in the case of `Continuous` (abstract float) data; - default is to apply `median` after skipping `missing` values - -- `count_fill`: function or other callable to determine value to be - imputed in the case of `Count` (integer) data; default is to apply - rounded `median` after skipping `missing` values - -- `finite_fill`: function or other callable to determine value to be - imputed in the case of `Multiclass` or `OrderedFactor` data - (categorical vectors); default is to apply `mode` after skipping - `missing` values - - -# Operations - -- `transform(mach, xnew)`: return `xnew` with missing values imputed - with the fill values learned when fitting `mach` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `filler`: the fill value to be imputed in all new data - - -# Examples - -``` -using MLJ -imputer = UnivariateFillImputer() - -x_continuous = [1.0, 2.0, missing, 3.0] -x_multiclass = coerce(["y", "n", "y", missing, "y"], Multiclass) -x_count = [1, 1, 1, 2, missing, 3, 3] - -mach = machine(imputer, x_continuous) -fit!(mach) - -julia> fitted_params(mach) -(filler = 2.0,) - -julia> transform(mach, [missing, missing, 101.0]) -3-element Vector{Float64}: - 2.0 - 2.0 - 101.0 - -mach2 = machine(imputer, x_multiclass) |> fit! - -julia> transform(mach2, x_multiclass) -5-element CategoricalArray{String,1,UInt32}: - "y" - "n" - "y" - "y" - "y" - -mach3 = machine(imputer, x_count) |> fit! - -julia> transform(mach3, [missing, missing, 5]) -3-element Vector{Int64}: - 2 - 2 - 5 -``` - -For imputing tabular data, use [`FillImputer`](@ref). - -""" -UnivariateFillImputer - -""" -$(MLJModelInterface.doc_header(FillImputer)) - -Use this model to impute `missing` values in tabular data. A fixed -"filler" value is learned from the training data, one for each column -of the table. - -For imputing missing values in a vector, use -[`UnivariateFillImputer`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any table of input features (eg, a `DataFrame`) whose columns - each have element scitypes `Union{Missing, T}`, where `T` is a - subtype of `Continuous`, `Multiclass`, `OrderedFactor` or - `Count`. Check scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: a vector of names of features (symbols) for which - imputation is to be attempted; default is empty, which is - interpreted as "impute all". - -- `continuous_fill`: function or other callable to determine value to - be imputed in the case of `Continuous` (abstract float) data; default is to apply - `median` after skipping `missing` values - -- `count_fill`: function or other callable to determine value to - be imputed in the case of `Count` (integer) data; default is to apply - rounded `median` after skipping `missing` values - -- `finite_fill`: function or other callable to determine value to be - imputed in the case of `Multiclass` or `OrderedFactor` data - (categorical vectors); default is to apply `mode` after skipping `missing` values - - -# Operations - -- `transform(mach, Xnew)`: return `Xnew` with missing values imputed with - the fill values learned when fitting `mach` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_seen_in_fit`: the names of features (columns) encountered - during training - -- `univariate_transformer`: the univariate model applied to determine - the fillers (it's fields contain the functions defining the filler computations) - -- `filler_given_feature`: dictionary of filler values, keyed on - feature (column) names - - -# Examples - -``` -using MLJ -imputer = FillImputer() - -X = (a = [1.0, 2.0, missing, 3.0, missing], - b = coerce(["y", "n", "y", missing, "y"], Multiclass), - c = [1, 1, 2, missing, 3]) - -schema(X) -julia> schema(X) -┌───────┬───────────────────────────────┐ -│ names │ scitypes │ -├───────┼───────────────────────────────┤ -│ a │ Union{Missing, Continuous} │ -│ b │ Union{Missing, Multiclass{2}} │ -│ c │ Union{Missing, Count} │ -└───────┴───────────────────────────────┘ - -mach = machine(imputer, X) -fit!(mach) - -julia> fitted_params(mach).filler_given_feature -(filler = 2.0,) - -julia> fitted_params(mach).filler_given_feature -Dict{Symbol, Any} with 3 entries: - :a => 2.0 - :b => "y" - :c => 2 - -julia> transform(mach, X) -(a = [1.0, 2.0, 2.0, 3.0, 2.0], - b = CategoricalValue{String, UInt32}["y", "n", "y", "y", "y"], - c = [1, 1, 2, 2, 3],) -``` - -See also [`UnivariateFillImputer`](@ref). - -""" -FillImputer - -""" -$(MLJModelInterface.doc_header(Standardizer)) - -Use this model to standardize (whiten) a `Continuous` vector, or -relevant columns of a table. The rescalings applied by this -transformer to new data are always those learned during the training -phase, which are generally different from what would actually -standardize the new data. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table or any abstract vector with - `Continuous` element scitype (any abstract float vector). Only - features in a table with `Continuous` scitype can be standardized; - check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: one of the following, with the behavior indicated below: - - - `[]` (empty, the default): standardize all features (columns) - having `Continuous` element scitype - - - non-empty vector of feature names (symbols): standardize only the - `Continuous` features in the vector (if `ignore=false`) or - `Continuous` features *not* named in the vector (`ignore=true`). - - - function or other callable: standardize a feature if the callable - returns `true` on its name. For example, `Standardizer(features = - name -> name in [:x1, :x3], ignore = true, count=true)` has the - same effect as `Standardizer(features = [:x1, :x3], ignore = true, - count=true)`, namely to standardize all `Continuous` and `Count` - features, with the exception of `:x1` and `:x3`. - - Note this behavior is further modified if the `ordered_factor` or `count` flags - are set to `true`; see below - -- `ignore=false`: whether to ignore or standardize specified `features`, as - explained above - -- `ordered_factor=false`: if `true`, standardize any `OrderedFactor` - feature wherever a `Continuous` feature would be standardized, as - described above - -- `count=false`: if `true`, standardize any `Count` feature wherever a - `Continuous` feature would be standardized, as described above - - -# Operations - -- `transform(mach, Xnew)`: return `Xnew` with relevant features - standardized according to the rescalings learned during fitting of - `mach`. - -- `inverse_transform(mach, Z)`: apply the inverse transformation to - `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is - approximately the same as `Xnew`; unavailable if `ordered_factor` or - `count` flags were set to `true`. - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_fit` - the names of features that will be standardized - -- `means` - the corresponding untransformed mean values - -- `stds` - the corresponding untransformed standard deviations - - -# Report - -The fields of `report(mach)` are: - -- `features_fit`: the names of features that will be standardized - - -# Examples - -``` -using MLJ - -X = (ordinal1 = [1, 2, 3], - ordinal2 = coerce([:x, :y, :x], OrderedFactor), - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = coerce(["Your father", "he", "is"], Multiclass)); - -julia> schema(X) -┌──────────┬──────────────────┐ -│ names │ scitypes │ -├──────────┼──────────────────┤ -│ ordinal1 │ Count │ -│ ordinal2 │ OrderedFactor{2} │ -│ ordinal3 │ Continuous │ -│ ordinal4 │ Continuous │ -│ nominal │ Multiclass{3} │ -└──────────┴──────────────────┘ - -stand1 = Standardizer(); - -julia> transform(fit!(machine(stand1, X)), X) -(ordinal1 = [1, 2, 3], - ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x], - ordinal3 = [-1.0, 0.0, 1.0], - ordinal4 = [1.0, 0.0, -1.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) - -stand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true); - -julia> transform(fit!(machine(stand2, X)), X) -(ordinal1 = [-1.0, 0.0, 1.0], - ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x], - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [1.0, 0.0, -1.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) -``` - -See also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref). -""" -Standardizer - - -""" -$(MLJModelInterface.doc_header(UnivariateDiscretizer)) - -Discretization converts a `Continuous` vector into an `OrderedFactor` -vector. In particular, the output is a `CategoricalVector` (whose -reference type is optimized). - -The transformation is chosen so that the vector on which the -transformer is fit has, in transformed form, an approximately uniform -distribution of values. Specifically, if `n_classes` is the level of -discretization, then `2*n_classes - 1` ordered quantiles are computed, -the odd quantiles being used for transforming (discretization) and the -even quantiles for inverse transforming. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with `Continuous` element scitype; check - scitype with `scitype(x)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `n_classes`: number of discrete classes in the output - - -# Operations - -- `transform(mach, xnew)`: discretize `xnew` according to the - discretization learned when fitting `mach` - -- `inverse_transform(mach, z)`: attempt to reconstruct from `z` a - vector that transforms to give `z` - - -# Fitted parameters - -The fields of `fitted_params(mach).fitesult` include: - -- `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`) - -- `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`) - - -# Example - -``` -using MLJ -using Random -Random.seed!(123) - -discretizer = UnivariateDiscretizer(n_classes=100) -mach = machine(discretizer, randn(1000)) -fit!(mach) - -julia> x = rand(5) -5-element Vector{Float64}: - 0.8585244609846809 - 0.37541692370451396 - 0.6767070590395461 - 0.9208844241267105 - 0.7064611415680901 - -julia> z = transform(mach, x) -5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}: - 0x52 - 0x42 - 0x4d - 0x54 - 0x4e - -x_approx = inverse_transform(mach, z) -julia> x - x_approx -5-element Vector{Float64}: - 0.008224506144777322 - 0.012731354778359405 - 0.0056265330571125816 - 0.005738175684445124 - 0.006835652575801987 -``` - -""" -UnivariateDiscretizer - - -""" -$(MLJModelInterface.doc_header(UnivariateBoxCoxTransformer)) - -Box-Cox transformations attempt to make data look more normally -distributed. This can improve performance and assist in the -interpretation of models which suppose that data is -generated by a normal distribution. - -A Box-Cox transformation (with shift) is of the form - - x -> ((x + c)^λ - 1)/λ - -for some constant `c` and real `λ`, unless `λ = 0`, in which -case the above is replaced with - - x -> log(x + c) - -Given user-specified hyper-parameters `n::Integer` and `shift::Bool`, -the present implementation learns the parameters `c` and `λ` from the -training data as follows: If `shift=true` and zeros are encountered in -the data, then `c` is set to `0.2` times the data mean. If there are -no zeros, then no shift is applied. Finally, `n` different values of `λ` -between `-0.4` and `3` are considered, with `λ` fixed to the value -maximizing normality of the transformed data. - -*Reference:* [Wikipedia entry for power - transform](https://en.wikipedia.org/wiki/Power_transform). - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with element scitype `Continuous`; check - the scitype with `scitype(x)` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `n=171`: number of values of the exponent `λ` to try - -- `shift=false`: whether to include a preliminary constant translation - in transformations, in the presence of zeros - - -# Operations - -- `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach` - -- `inverse_transform(mach, z)`: reconstruct the vector `z` whose - transformation learned by `mach` is `z` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `λ`: the learned Box-Cox exponent - -- `c`: the learned shift - - -# Examples - -``` -using MLJ -using UnicodePlots -using Random -Random.seed!(123) - -transf = UnivariateBoxCoxTransformer() - -x = randn(1000).^2 - -mach = machine(transf, x) -fit!(mach) - -z = transform(mach, x) - -julia> histogram(x) - ┌ ┐ - [ 0.0, 2.0) ┤███████████████████████████████████ 848 - [ 2.0, 4.0) ┤████▌ 109 - [ 4.0, 6.0) ┤█▍ 33 - [ 6.0, 8.0) ┤▍ 7 - [ 8.0, 10.0) ┤▏ 2 - [10.0, 12.0) ┤ 0 - [12.0, 14.0) ┤▏ 1 - └ ┘ - Frequency - -julia> histogram(z) - ┌ ┐ - [-5.0, -4.0) ┤█▎ 8 - [-4.0, -3.0) ┤████████▊ 64 - [-3.0, -2.0) ┤█████████████████████▊ 159 - [-2.0, -1.0) ┤█████████████████████████████▊ 216 - [-1.0, 0.0) ┤███████████████████████████████████ 254 - [ 0.0, 1.0) ┤█████████████████████████▊ 188 - [ 1.0, 2.0) ┤████████████▍ 90 - [ 2.0, 3.0) ┤██▊ 20 - [ 3.0, 4.0) ┤▎ 1 - └ ┘ - Frequency - -``` - -""" -UnivariateBoxCoxTransformer - - -""" -$(MLJModelInterface.doc_header(OneHotEncoder)) - -Use this model to one-hot encode the `Multiclass` and `OrderedFactor` -features (columns) of some table, leaving other columns unchanged. - -New data to be transformed may lack features present in the fit data, -but no *new* features can be present. - -**Warning:** This transformer assumes that `levels(col)` for any -`Multiclass` or `OrderedFactor` column, `col`, is the same for -training data and new data to be transformed. - -To ensure *all* features are transformed into `Continuous` features, or -dropped, use [`ContinuousEncoder`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table. Columns can be of mixed type - but only those with element scitype `Multiclass` or `OrderedFactor` - can be encoded. Check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: a vector of symbols (column names). If empty (default) - then all `Multiclass` and `OrderedFactor` features are - encoded. Otherwise, encoding is further restricted to the specified - features (`ignore=false`) or the unspecified features - (`ignore=true`). This default behavior can be modified by the - `ordered_factor` flag. - -- `ordered_factor=false`: when `true`, `OrderedFactor` features are - universally excluded - -- `drop_last=true`: whether to drop the column corresponding to the - final class of encoded features. For example, a three-class feature - is spawned into three new features if `drop_last=false`, but just - two features otherwise. - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `all_features`: names of all features encountered in training - -- `fitted_levels_given_feature`: dictionary of the levels associated - with each feature encoded, keyed on the feature name - -- `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such - as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl - reference integer representing a level, and `ftr` the corresponding - new feature name; the dictionary is keyed on the names of features that - are encoded - - -# Report - -The fields of `report(mach)` are: - -- `features_to_be_encoded`: names of input features to be encoded - -- `new_features`: names of all output features - - -# Example - -``` -using MLJ - -X = (name=categorical(["Danesh", "Lee", "Mary", "John"]), - grade=categorical(["A", "B", "A", "C"], ordered=true), - height=[1.85, 1.67, 1.5, 1.67], - n_devices=[3, 2, 4, 3]) - -julia> schema(X) -┌───────────┬──────────────────┐ -│ names │ scitypes │ -├───────────┼──────────────────┤ -│ name │ Multiclass{4} │ -│ grade │ OrderedFactor{3} │ -│ height │ Continuous │ -│ n_devices │ Count │ -└───────────┴──────────────────┘ - -hot = OneHotEncoder(drop_last=true) -mach = fit!(machine(hot, X)) -W = transform(mach, X) - -julia> schema(W) -┌──────────────┬────────────┐ -│ names │ scitypes │ -├──────────────┼────────────┤ -│ name__Danesh │ Continuous │ -│ name__John │ Continuous │ -│ name__Lee │ Continuous │ -│ grade__A │ Continuous │ -│ grade__B │ Continuous │ -│ height │ Continuous │ -│ n_devices │ Count │ -└──────────────┴────────────┘ -``` - -See also [`ContinuousEncoder`](@ref). - -""" -OneHotEncoder - - -""" -$(MLJModelInterface.doc_header(ContinuousEncoder)) - -Use this model to arrange all features (columns) of a table to have -`Continuous` element scitype, by applying the following protocol to -each feature `ftr`: - -- If `ftr` is already `Continuous` retain it. - -- If `ftr` is `Multiclass`, one-hot encode it. - -- If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, - Continuous)` (vector of floating point integers), unless - `ordered_factors=false` is specified, in which case one-hot encode - it. - -- If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`. - -- If `ftr` has some other element scitype, or was not observed in - fitting the encoder, drop it from the table. - -**Warning:** This transformer assumes that `levels(col)` for any -`Multiclass` or `OrderedFactor` column, `col`, is the same for -training data and new data to be transformed. - -To selectively one-hot-encode categorical features (without dropping -columns) use [`OneHotEncoder`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table. Columns can be of mixed type - but only those with element scitype `Multiclass` or `OrderedFactor` - can be encoded. Check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `drop_last=true`: whether to drop the column corresponding to the - final class of one-hot encoded features. For example, a three-class - feature is spawned into three new features if `drop_last=false`, but - two just features otherwise. - -- `one_hot_ordered_factors=false`: whether to one-hot any feature - with `OrderedFactor` element scitype, or to instead coerce it - directly to a (single) `Continuous` feature using the order - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_to_keep`: names of features that will not be dropped from - the table - -- `one_hot_encoder`: the `OneHotEncoder` model instance for handling - the one-hot encoding - -- `one_hot_encoder_fitresult`: the fitted parameters of the - `OneHotEncoder` model - - -# Report - -- `features_to_keep`: names of input features that will not be dropped - from the table - -- `new_features`: names of all output features - - -# Example - -```julia -X = (name=categorical(["Danesh", "Lee", "Mary", "John"]), - grade=categorical(["A", "B", "A", "C"], ordered=true), - height=[1.85, 1.67, 1.5, 1.67], - n_devices=[3, 2, 4, 3], - comments=["the force", "be", "with you", "too"]) - -julia> schema(X) -┌───────────┬──────────────────┐ -│ names │ scitypes │ -├───────────┼──────────────────┤ -│ name │ Multiclass{4} │ -│ grade │ OrderedFactor{3} │ -│ height │ Continuous │ -│ n_devices │ Count │ -│ comments │ Textual │ -└───────────┴──────────────────┘ - -encoder = ContinuousEncoder(drop_last=true) -mach = fit!(machine(encoder, X)) -W = transform(mach, X) - -julia> schema(W) -┌──────────────┬────────────┐ -│ names │ scitypes │ -├──────────────┼────────────┤ -│ name__Danesh │ Continuous │ -│ name__John │ Continuous │ -│ name__Lee │ Continuous │ -│ grade │ Continuous │ -│ height │ Continuous │ -│ n_devices │ Continuous │ -└──────────────┴────────────┘ - -julia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features -1-element Vector{Symbol}: - :comments - -``` - -See also [`OneHotEncoder`](@ref) -""" -ContinuousEncoder - - -""" -$(MLJModelInterface.doc_header(UnivariateTimeTypeToContinuous)) - -Use this model to convert vectors with a `TimeType` element type to -vectors of `Float64` type (`Continuous` element scitype). - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector whose element type is a subtype of - `Dates.TimeType` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `zero_time`: the time that is to correspond to 0.0 under - transformations, with the type coinciding with the training data - element type. If unspecified, the earliest time encountered in - training is used. - -- `step::Period=Hour(24)`: time interval to correspond to one unit - under transformation - - -# Operations - -- `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit - - -# Fitted parameters - -`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` -actually used in transformations, which may differ from the -user-specified hyper-parameters. - - -# Example - -``` -using MLJ -using Dates - -x = [Date(2001, 1, 1) + Day(i) for i in 0:4] - -encoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1), - step=Week(1)) - -mach = machine(encoder, x) -fit!(mach) -julia> transform(mach, x) -5-element Vector{Float64}: - 52.285714285714285 - 52.42857142857143 - 52.57142857142857 - 52.714285714285715 - 52.857142 -``` - -""" -UnivariateTimeTypeToContinuous - -""" -$(MLJModelInterface.doc_header(InteractionTransformer)) - -Generates all polynomial interaction terms up to the given order for the subset of chosen -columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to -generate interactions. If `features` is not specified, all such columns with scitype -`<:Infinite` in the table are used as a basis. - -In MLJ or MLJBase, you can transform features `X` with the single call - - transform(machine(model), X) - -See also the example below. - - -# Hyper-parameters - -- `order`: Maximum order of interactions to be generated. -- `features`: Restricts interations generation to those columns - -# Operations - -- `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` - using the hyper-parameters specified in `model`. - -# Example - -``` -using MLJ - -X = ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"] -) -it = InteractionTransformer(order=3) -mach = machine(it) - -julia> transform(mach, X) -(A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162],) - -it = InteractionTransformer(order=2, features=[:A, :B]) -mach = machine(it) - -julia> transform(mach, X) -(A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18],) - -``` - -""" -InteractionTransformer diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 12e4272..05d3433 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -1,6 +1,6 @@ [BetaML.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -34,10 +34,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -71,10 +71,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -108,10 +108,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -145,10 +145,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -182,10 +182,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.AutoEncoder] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -219,10 +219,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -256,10 +256,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PegasosClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -293,47 +293,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[BetaML.NeuralNetworkRegressor] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":is_pure_julia" = "`true`" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":package_name" = "BetaML" -":name" = "NeuralNetworkRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" [BetaML.KMeansClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -367,10 +330,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" + +[BetaML.NeuralNetworkRegressor] ":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":package_name" = "BetaML" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" [BetaML.MultitargetGaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -404,10 +404,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -441,10 +441,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.MultitargetNeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -478,10 +478,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -515,10 +515,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GeneralImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{String, Vector{Int64}}\", \"Any\", \"Union{Bool, Vector{Bool}}\", \"Union{Function, Vector{Function}}\", \"Union{Function, Vector{Function}}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -552,10 +552,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.NeuralNetworkClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Union{Nothing, Vector}\", \"String\", \"Any\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -589,10 +589,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.SimpleImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -626,10 +626,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -663,10 +663,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KernelPerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -700,10 +700,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KMedoidsClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -737,11 +737,48 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" + +[MLJEnsembles.EnsembleModel] +":constructor" = "`EnsembleModel`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJEnsembles.EnsembleModel" +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ensemble model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" +":package_name" = "MLJEnsembles" +":name" = "EnsembleModel" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" [CatBoost.CatBoostRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, PythonCall.Core.Py, String}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" ":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" @@ -774,11 +811,11 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [CatBoost.CatBoostClassifier] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" ":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" @@ -811,7 +848,7 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [NearestNeighborModels.KNNClassifier] ":is_wrapper" = "`false`" @@ -1073,7 +1110,7 @@ ":constructor" = "`nothing`" [MLJScikitLearnInterface.ProbabilisticSGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1107,10 +1144,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"AbstractArray{Float64}\", \"Bool\", \"Any\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1144,10 +1181,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Any\", \"Any\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1181,10 +1218,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1218,10 +1255,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1255,10 +1292,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, String}\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1292,10 +1329,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1329,10 +1366,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -1366,10 +1403,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HDBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, Int64}\", \"String\", \"Float64\", \"String\", \"Int64\", \"String\", \"Bool\", \"Union{Nothing, String}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1403,10 +1440,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1440,10 +1477,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1477,10 +1514,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsICRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1514,10 +1551,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ARDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1551,10 +1588,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1588,10 +1625,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1625,10 +1662,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Float64\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1662,10 +1699,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ComplementNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -1699,10 +1736,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HuberRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1736,10 +1773,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1773,10 +1810,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1810,10 +1847,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, AbstractArray}\", \"Any\", \"Int64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1847,10 +1884,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1884,10 +1921,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1921,10 +1958,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MeanShift] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Float64}\", \"Union{Nothing, AbstractArray}\", \"Bool\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1958,10 +1995,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1995,10 +2032,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -2032,10 +2069,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AffinityPropagation] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"String\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2069,10 +2106,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2106,10 +2143,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2143,10 +2180,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BernoulliNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2180,10 +2217,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2217,10 +2254,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2254,10 +2291,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2291,10 +2328,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector{Float64}}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -2328,10 +2365,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2365,10 +2402,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Int64, String}\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2402,10 +2439,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2439,10 +2476,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2476,10 +2513,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2513,10 +2550,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -2550,10 +2587,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\", \"String\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2587,10 +2624,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianRidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2624,10 +2661,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2661,10 +2698,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2698,10 +2735,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OPTICS] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2735,10 +2772,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RANSACRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2772,10 +2809,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2809,10 +2846,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2846,10 +2883,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MiniBatchKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Int64, String}\", \"Union{String, AbstractArray}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2883,10 +2920,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2920,10 +2957,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -2957,10 +2994,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BisectingKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2994,10 +3031,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3031,10 +3068,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3068,10 +3105,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3105,10 +3142,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Bool\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3142,10 +3179,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.FeatureAgglomeration] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3179,10 +3216,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3216,10 +3253,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3253,10 +3290,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianQDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector}\", \"Float64\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -3290,10 +3327,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianLDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64, String}\", \"Union{Nothing, AbstractVector}\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3327,10 +3364,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3364,10 +3401,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.TheilSenRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Float64\", \"Any\", \"Union{Nothing, Int64}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3401,10 +3438,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SpectralClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, String}\", \"Any\", \"Int64\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3438,10 +3475,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.Birch] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -3475,10 +3512,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AgglomerativeClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3512,10 +3549,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Int64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3549,10 +3586,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3586,10 +3623,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Int64, AbstractVector{Float64}}\", \"Bool\", \"Any\", \"Bool\", \"String\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Any\", \"Union{Nothing, Int64}\", \"Int64\", \"Bool\", \"Float64\", \"String\", \"Any\", \"Union{Nothing, AbstractVector{Float64}}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3623,10 +3660,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3660,10 +3697,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3697,10 +3734,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3734,10 +3771,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultinomialNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3771,10 +3808,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3808,10 +3845,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3845,7 +3882,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.ABODDetector] ":is_wrapper" = "`false`" @@ -4181,7 +4218,7 @@ ":constructor" = "`nothing`" [MLJIteration.IteratedModel] -":constructor" = "`IteratedModel`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" ":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4215,10 +4252,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`IteratedModel`" [MLJTSVDInterface.TSVDTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\")`" ":package_uuid" = "9449cd9e-2762-5aa3-a617-5413e99d722e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -4252,7 +4289,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [PartitionedLS.PartLS] ":constructor" = "`nothing`" @@ -4292,7 +4329,7 @@ ":is_wrapper" = "`false`" [MLJLinearModels.QuantileRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4326,10 +4363,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LogisticClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4363,10 +4400,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.MultinomialClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4400,10 +4437,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LADRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4437,10 +4474,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.RidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -4474,10 +4511,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.RobustRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"MLJLinearModels.RobustRho\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4511,10 +4548,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.ElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4548,10 +4585,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -4585,10 +4622,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -4622,10 +4659,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.HuberRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4659,7 +4696,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [Maxnet.MaxnetBinaryClassifier] ":constructor" = "`nothing`" @@ -4736,7 +4773,7 @@ ":constructor" = "`nothing`" [MLJNaiveBayesInterface.GaussianNBClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`()`" ":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" ":hyperparameter_ranges" = "`()`" @@ -4770,10 +4807,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJNaiveBayesInterface.MultinomialNBClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\",)`" ":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" ":hyperparameter_ranges" = "`(nothing,)`" @@ -4807,10 +4844,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJDecisionTreeInterface.AdaBoostStumpClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -4844,10 +4881,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.DecisionTreeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4881,10 +4918,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.DecisionTreeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4918,10 +4955,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4955,10 +4992,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4992,10 +5029,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJBase.Pipeline] -":constructor" = "`Pipeline`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" ":package_uuid" = "unknown" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5029,10 +5066,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`Pipeline`" [MLJBase.Resampler] -":constructor" = "`MLJBase.Resampler`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict{<:Any, <:Real}}\", \"Any\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Int64\", \"Bool\", \"Bool\", \"Any\", \"Bool\")`" ":package_uuid" = "unknown" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5066,10 +5103,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`MLJBase.Resampler`" [MLJBase.Stack] -":constructor" = "`MLJBase.Stack`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" ":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5103,10 +5140,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`MLJBase.Stack`" [MLJBase.TransformedTargetModel] -":constructor" = "`TransformedTargetModel`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\")`" ":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5140,10 +5177,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`TransformedTargetModel`" [MLJClusteringInterface.HierarchicalClustering] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Distances.SemiMetric\", \"Symbol\", \"Union{Nothing, Float64}\", \"Int64\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5177,10 +5214,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.DBSCAN] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Real\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5214,10 +5251,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.KMeans] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5251,10 +5288,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.AffinityPropagation] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Union{Nothing, Float64}\", \"Distances.SemiMetric\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5288,10 +5325,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.KMedoids] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5325,10 +5362,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJBalancing.BalancedBaggingClassifier] -":is_wrapper" = "`true`" +":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5362,10 +5399,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" +":is_wrapper" = "`true`" [MLJBalancing.BalancedModel] -":is_wrapper" = "`true`" +":constructor" = "`BalancedModel`" ":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" ":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5399,10 +5436,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`BalancedModel`" +":is_wrapper" = "`true`" [Imbalance.RandomOversampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5436,10 +5473,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.SMOTENC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Any\", \"AbstractString\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5473,10 +5510,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.TomekUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5510,10 +5547,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.ClusterUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"AbstractString\", \"Any\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5547,10 +5584,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.SMOTE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5584,47 +5621,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" - -[Imbalance.SMOTEN] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" + +[Imbalance.RandomUndersampler] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "Imbalance.MLJ.SMOTEN" -":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" +":load_path" = "Imbalance.MLJ.RandomUndersampler" +":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" ":is_pure_julia" = "`true`" -":human_name" = "smoten" +":human_name" = "random undersampler" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" ":package_name" = "Imbalance" -":name" = "SMOTEN" +":name" = "RandomUndersampler" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] +":implemented_methods" = [":transform_scitype", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":constructor" = "`nothing`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":is_wrapper" = "`false`" [Imbalance.ROSE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"AbstractFloat\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5658,47 +5695,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" - -[Imbalance.RandomUndersampler] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" + +[Imbalance.SMOTEN] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "Imbalance.MLJ.RandomUndersampler" -":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" +":load_path" = "Imbalance.MLJ.SMOTEN" +":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" ":is_pure_julia" = "`true`" -":human_name" = "random undersampler" +":human_name" = "smoten" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" ":package_name" = "Imbalance" -":name" = "RandomUndersampler" +":name" = "SMOTEN" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":transform_scitype", ":transform"] +":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":is_wrapper" = "`false`" [Imbalance.ENNUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"AbstractString\", \"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5732,10 +5769,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.BorderlineSMOTE1] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"Integer\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5769,10 +5806,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.RandomWalkOversampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5806,7 +5843,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJTuning.TunedModel] ":constructor" = "`TunedModel`" @@ -5846,7 +5883,7 @@ ":is_wrapper" = "`true`" [FeatureSelection.FeatureSelector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" ":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5880,10 +5917,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" ":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [FeatureSelection.RecursiveFeatureElimination] -":constructor" = "`RecursiveFeatureElimination`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" ":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5917,10 +5954,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`RecursiveFeatureElimination`" [EvoLinear.EvoSplineRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" ":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5954,10 +5991,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [EvoLinear.EvoLinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" ":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5991,10 +6028,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJText.TfidfTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6028,10 +6065,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.CountTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -6065,10 +6102,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.BM25Transformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -6102,10 +6139,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [LightGBM.LGBMClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" ":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6139,10 +6176,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [LightGBM.LGBMRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" ":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6176,10 +6213,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [LaplaceRedux.LaplaceClassifier] -":is_wrapper" = "`true`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\", \"Symbol\")`" ":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6213,10 +6250,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`true`" [LaplaceRedux.LaplaceRegressor] -":is_wrapper" = "`true`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\")`" ":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6250,7 +6287,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`true`" [SymbolicRegression.MultitargetSRRegressor] ":is_wrapper" = "`false`" @@ -6327,7 +6364,7 @@ ":constructor" = "`nothing`" [EvoTrees.EvoTreeClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6361,10 +6398,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeGaussian] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6398,10 +6435,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeMLE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6435,10 +6472,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6472,10 +6509,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeCount] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6509,717 +6546,828 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJTestInterface] -[MLJModels.ConstantClassifier] +[MLJModels.DeterministicConstantRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" +":package_license" = "MIT" +":load_path" = "MLJModels.DeterministicConstantRegressor" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ +":name" = "DeterministicConstantRegressor" +":human_name" = "deterministic constant regressor" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict"] +":hyperparameters" = "`()`" ":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" ":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":constructor" = "`nothing`" + +[MLJModels.ConstantClassifier] +":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" ":load_path" = "MLJModels.ConstantClassifier" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "constant classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ConstantClassifier" -":target_in_fit" = "`true`" +":is_wrapper" = "`false`" +":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" +":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" +":name" = "ConstantClassifier" +":human_name" = "constant classifier" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":hyperparameters" = "`()`" +":hyperparameter_types" = "`()`" +":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJModels.Standardizer] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.Standardizer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +[MLJModels.ConstantRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" ":is_pure_julia" = "`true`" -":human_name" = "standardizer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":package_name" = "MLJModels" -":name" = "Standardizer" -":target_in_fit" = "`false`" +":package_license" = "MIT" +":load_path" = "MLJModels.ConstantRegressor" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" +":name" = "ConstantRegressor" +":human_name" = "constant regressor" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":fitted_params", ":predict"] +":hyperparameters" = "`(:distribution_type,)`" +":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" +":hyperparameter_ranges" = "`(nothing,)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.BinaryThresholdPredictor] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`false`" +":package_name" = "MLJModels" +":package_license" = "unknown" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":package_uuid" = "" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`true`" ":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" +":name" = "BinaryThresholdPredictor" +":human_name" = "binary threshold predictor" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :threshold)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`MLJModels.BinaryThresholdPredictor`" [MLJModels.DeterministicConstantClassifier] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" +":package_license" = "MIT" +":load_path" = "MLJModels.DeterministicConstantClassifier" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ +":name" = "DeterministicConstantClassifier" +":human_name" = "deterministic constant classifier" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict"] +":hyperparameters" = "`()`" ":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" ":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" +":constructor" = "`nothing`" + +[MLJGLMInterface.LinearBinaryClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.DeterministicConstantClassifier" -":hyperparameters" = "`()`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearBinaryClassifier" +":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "deterministic constant classifier" +":human_name" = "linear binary classifier" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ +":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "DeterministicConstantClassifier" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearBinaryClassifier" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateTimeTypeToContinuous] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJGLMInterface.LinearCountRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateTimeTypeToContinuous" -":hyperparameters" = "`(:zero_time, :step)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearCountRegressor" +":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" +":human_name" = "linear count regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateTimeTypeToContinuous" -":target_in_fit" = "`false`" +":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearCountRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.OneHotEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" + +[MLJGLMInterface.LinearRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.OneHotEncoder" -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearRegressor" +":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "OneHotEncoder" -":target_in_fit" = "`false`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.ContinuousEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OneRule.OneRuleClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`()`" +":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.ContinuousEncoder" -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":prediction_type" = ":deterministic" +":load_path" = "OneRule.OneRuleClassifier" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "continuous encoder" -":is_supervised" = "`false`" +":human_name" = "one rule classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ContinuousEncoder" -":target_in_fit" = "`false`" +":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/roland-KA/OneRule.jl" +":package_name" = "OneRule" +":name" = "OneRuleClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":implemented_methods" = [":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateBoxCoxTransformer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OutlierDetectionPython.MCDDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateBoxCoxTransformer" -":hyperparameters" = "`(:n, :shift)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable Box-Cox transformer" +":load_path" = "OutlierDetectionPython.MCDDetector" +":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "mcd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateBoxCoxTransformer" +":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "MCDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.InteractionTransformer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OutlierDetectionPython.COPODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Static`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.InteractionTransformer" -":hyperparameters" = "`(:order, :features)`" -":is_pure_julia" = "`true`" -":human_name" = "interaction transformer" +":load_path" = "OutlierDetectionPython.COPODDetector" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "copod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "InteractionTransformer" +":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "COPODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.ConstantRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing,)`" + +[OutlierDetectionPython.HBOSDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJModels.ConstantRegressor" -":hyperparameters" = "`(:distribution_type,)`" -":is_pure_julia" = "`true`" -":human_name" = "constant regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.HBOSDetector" +":hyperparameters" = "`(:n_bins, :alpha, :tol)`" +":is_pure_julia" = "`false`" +":human_name" = "hbos detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" +":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ConstantRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "HBOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateDiscretizer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\",)`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing,)`" + +[OutlierDetectionPython.IForestDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateDiscretizer" -":hyperparameters" = "`(:n_classes,)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":load_path" = "OutlierDetectionPython.IForestDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "i forest detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateDiscretizer" +":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "IForestDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" -[MLJModels.BinaryThresholdPredictor] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" -":package_uuid" = "" -":hyperparameter_ranges" = "`(nothing, nothing)`" +[OutlierDetectionPython.SOSDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.BinaryThresholdPredictor" -":hyperparameters" = "`(:model, :threshold)`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.SOSDetector" +":hyperparameters" = "`(:perplexity, :metric, :eps)`" ":is_pure_julia" = "`false`" -":human_name" = "binary threshold predictor" -":is_supervised" = "`true`" +":human_name" = "sos detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" +":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "BinaryThresholdPredictor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "SOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`MLJModels.BinaryThresholdPredictor`" - -[MLJModels.FillImputer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.ABODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.FillImputer" -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "fill imputer" +":load_path" = "OutlierDetectionPython.ABODDetector" +":hyperparameters" = "`(:n_neighbors, :method)`" +":is_pure_julia" = "`false`" +":human_name" = "abod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "FillImputer" +":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "ABODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.DeterministicConstantRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" + +[OutlierDetectionPython.LOFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.DeterministicConstantRegressor" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "deterministic constant regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LOFDetector" +":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" +":is_pure_julia" = "`false`" +":human_name" = "lof detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ +":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "DeterministicConstantRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateStandardizer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" + +[OutlierDetectionPython.PCADetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateStandardizer" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":load_path" = "OutlierDetectionPython.PCADetector" +":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "pca detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateStandardizer" +":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "PCADetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateFillImputer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" + +[OutlierDetectionPython.INNEDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateFillImputer" -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable fill imputer" +":load_path" = "OutlierDetectionPython.INNEDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "inne detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateFillImputer" +":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "INNEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearBinaryClassifier] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.OCSVMDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearBinaryClassifier" -":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear binary classifier" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.OCSVMDetector" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "ocsvm detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" +":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearBinaryClassifier" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "OCSVMDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearCountRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.ECODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearCountRegressor" -":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear count regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.ECODDetector" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "ecod detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearCountRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "ECODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.SODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearRegressor" -":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.SODDetector" +":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" +":is_pure_julia" = "`false`" +":human_name" = "sod detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "SODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" -[OneRule.OneRuleClassifier] +[OutlierDetectionPython.LODADetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`()`" -":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" -":hyperparameter_ranges" = "`()`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "OneRule.OneRuleClassifier" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "one rule classifier" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LODADetector" +":hyperparameters" = "`(:n_bins, :n_random_cuts)`" +":is_pure_julia" = "`false`" +":human_name" = "loda detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" +":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/roland-KA/OneRule.jl" -":package_name" = "OneRule" -":name" = "OneRuleClassifier" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LODADetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.MCDDetector] +[OutlierDetectionPython.KDEDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7227,17 +7375,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.MCDDetector" -":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" +":load_path" = "OutlierDetectionPython.KDEDetector" +":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" ":is_pure_julia" = "`false`" -":human_name" = "mcd detector" +":human_name" = "kde detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "MCDDetector" +":name" = "KDEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7252,9 +7400,9 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.COPODDetector] +[OutlierDetectionPython.CDDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\",)`" +":hyperparameter_types" = "`(\"PythonCall.Py\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" @@ -7264,17 +7412,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.COPODDetector" -":hyperparameters" = "`(:n_jobs,)`" +":load_path" = "OutlierDetectionPython.CDDetector" +":hyperparameters" = "`(:model,)`" ":is_pure_julia" = "`false`" -":human_name" = "copod detector" +":human_name" = "cd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "COPODDetector" +":name" = "CDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7289,11 +7437,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.HBOSDetector] +[OutlierDetectionPython.KNNDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7301,17 +7449,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.HBOSDetector" -":hyperparameters" = "`(:n_bins, :alpha, :tol)`" +":load_path" = "OutlierDetectionPython.KNNDetector" +":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" ":is_pure_julia" = "`false`" -":human_name" = "hbos detector" +":human_name" = "knn detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" +":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "HBOSDetector" +":name" = "KNNDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7326,11 +7474,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.IForestDetector] +[OutlierDetectionPython.GMMDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7338,17 +7486,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.IForestDetector" -":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" +":load_path" = "OutlierDetectionPython.GMMDetector" +":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" ":is_pure_julia" = "`false`" -":human_name" = "i forest detector" +":human_name" = "gmm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "IForestDetector" +":name" = "GMMDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7363,11 +7511,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.SOSDetector] +[OutlierDetectionPython.COFDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7375,17 +7523,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.SOSDetector" -":hyperparameters" = "`(:perplexity, :metric, :eps)`" +":load_path" = "OutlierDetectionPython.COFDetector" +":hyperparameters" = "`(:n_neighbors, :method)`" ":is_pure_julia" = "`false`" -":human_name" = "sos detector" +":human_name" = "cof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" +":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "SOSDetector" +":name" = "COFDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7400,11 +7548,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.ABODDetector] +[OutlierDetectionPython.CBLOFDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7412,17 +7560,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.ABODDetector" -":hyperparameters" = "`(:n_neighbors, :method)`" +":load_path" = "OutlierDetectionPython.CBLOFDetector" +":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" ":is_pure_julia" = "`false`" -":human_name" = "abod detector" +":human_name" = "cblof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "ABODDetector" +":name" = "CBLOFDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7437,11 +7585,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.LOFDetector] +[OutlierDetectionPython.LOCIDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" +":hyperparameter_types" = "`(\"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7449,17 +7597,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LOFDetector" -":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" +":load_path" = "OutlierDetectionPython.LOCIDetector" +":hyperparameters" = "`(:alpha, :k)`" ":is_pure_julia" = "`false`" -":human_name" = "lof detector" +":human_name" = "loci detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" +":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "LOFDetector" +":name" = "LOCIDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7474,11 +7622,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.PCADetector] +[OutlierDetectionPython.LMDDDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7486,17 +7634,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.PCADetector" -":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" +":load_path" = "OutlierDetectionPython.LMDDDetector" +":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" ":is_pure_julia" = "`false`" -":human_name" = "pca detector" +":human_name" = "lmdd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "PCADetector" +":name" = "LMDDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7511,11 +7659,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.INNEDetector] +[OutlierDetectionPython.RODDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" +":hyperparameter_types" = "`(\"Bool\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7523,17 +7671,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.INNEDetector" -":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" +":load_path" = "OutlierDetectionPython.RODDetector" +":hyperparameters" = "`(:parallel_execution,)`" ":is_pure_julia" = "`false`" -":human_name" = "inne detector" +":human_name" = "rod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "INNEDetector" +":name" = "RODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7548,1046 +7696,1157 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.OCSVMDetector] +[SelfOrganizingMaps.SelfOrganizingMap] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" +":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" +":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" +":is_pure_julia" = "`true`" +":human_name" = "self organizing map" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" +":package_name" = "SelfOrganizingMaps" +":name" = "SelfOrganizingMap" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[InteractiveUtils] + +[MLJMultivariateStatsInterface.LDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.LDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "linear discriminant analysis model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJMultivariateStatsInterface.MultitargetLinearRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetLinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.OCSVMDetector" -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" -":is_pure_julia" = "`false`" -":human_name" = "ocsvm detector" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian subspace LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" +":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "OCSVMDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianSubspaceLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.ECODDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJMultivariateStatsInterface.FactorAnalysis] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.ECODDetector" -":hyperparameters" = "`(:n_jobs,)`" -":is_pure_julia" = "`false`" -":human_name" = "ecod detector" +":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" +":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "factor analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "ECODDetector" +":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "FactorAnalysis" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.SODDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJMultivariateStatsInterface.LinearRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.SODDetector" -":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" -":is_pure_julia" = "`false`" -":human_name" = "sod detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "SODDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.LODADetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJMultivariateStatsInterface.ICA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LODADetector" -":hyperparameters" = "`(:n_bins, :n_random_cuts)`" -":is_pure_julia" = "`false`" -":human_name" = "loda detector" +":load_path" = "MLJMultivariateStatsInterface.ICA" +":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "independent component analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LODADetector" +":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "ICA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.KDEDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" + +[MLJMultivariateStatsInterface.PPCA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KDEDetector" -":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" -":is_pure_julia" = "`false`" -":human_name" = "kde detector" +":load_path" = "MLJMultivariateStatsInterface.PPCA" +":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "probabilistic PCA model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "KDEDetector" +":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PPCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.CDDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"PythonCall.Core.Py\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJMultivariateStatsInterface.RidgeRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CDDetector" -":hyperparameters" = "`(:model,)`" -":is_pure_julia" = "`false`" -":human_name" = "cd detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "ridge regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" +":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "CDDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "RidgeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.KNNDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJMultivariateStatsInterface.KernelPCA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KNNDetector" -":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" -":is_pure_julia" = "`false`" -":human_name" = "knn detector" +":load_path" = "MLJMultivariateStatsInterface.KernelPCA" +":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":is_pure_julia" = "`true`" +":human_name" = "kernel prinicipal component analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "KNNDetector" +":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "KernelPCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.GMMDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.GMMDetector" -":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" -":is_pure_julia" = "`false`" -":human_name" = "gmm detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget ridge regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "GMMDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetRidgeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.COFDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJMultivariateStatsInterface.SubspaceLDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.COFDetector" -":hyperparameters" = "`(:n_neighbors, :method)`" -":is_pure_julia" = "`false`" -":human_name" = "cof detector" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "subpace LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" +":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "COFDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "SubspaceLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.CBLOFDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" + +[MLJMultivariateStatsInterface.BayesianLDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CBLOFDetector" -":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" -":is_pure_julia" = "`false`" -":human_name" = "cblof detector" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" +":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "CBLOFDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.LOCIDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Real\", \"Real\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJMultivariateStatsInterface.PCA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LOCIDetector" -":hyperparameters" = "`(:alpha, :k)`" -":is_pure_julia" = "`false`" -":human_name" = "loci detector" +":load_path" = "MLJMultivariateStatsInterface.PCA" +":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "pca" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LOCIDetector" +":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.LMDDDetector] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJTransforms.Standardizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LMDDDetector" -":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" -":is_pure_julia" = "`false`" -":human_name" = "lmdd detector" +":load_path" = "MLJTransforms.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":is_pure_julia" = "`true`" +":human_name" = "standardizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LMDDDetector" +":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=unknown\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "Standardizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.RODDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJTransforms.UnivariateTimeTypeToContinuous] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.RODDetector" -":hyperparameters" = "`(:parallel_execution,)`" -":is_pure_julia" = "`false`" -":human_name" = "rod detector" +":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "RODDetector" +":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=unknown\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateTimeTypeToContinuous" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[SelfOrganizingMaps.SelfOrganizingMap] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" -":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.OneHotEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" -":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" +":load_path" = "MLJTransforms.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" ":is_pure_julia" = "`true`" -":human_name" = "self organizing map" +":human_name" = "one-hot encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" -":package_name" = "SelfOrganizingMaps" -":name" = "SelfOrganizingMap" +":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=unknown\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OneHotEncoder" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" - -[InteractiveUtils] - -[MLJMultivariateStatsInterface.LDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.ContinuousEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.LDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" ":is_pure_julia" = "`true`" -":human_name" = "linear discriminant analysis model" -":is_supervised" = "`true`" +":human_name" = "continuous encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LDA" -":target_in_fit" = "`true`" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContinuousEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.MultitargetLinearRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJTransforms.FrequencyEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" -":hyperparameters" = "`(:bias,)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget linear regressor" -":is_supervised" = "`true`" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "MultitargetLinearRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FrequencyEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJTransforms.TargetEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :priors)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" ":is_pure_julia" = "`true`" -":human_name" = "Bayesian subspace LDA model" -":is_supervised" = "`true`" +":human_name" = "target encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "BayesianSubspaceLDA" +":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "TargetEncoder" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.FactorAnalysis] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateBoxCoxTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" -":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" ":is_pure_julia" = "`true`" -":human_name" = "factor analysis model" +":human_name" = "single variable Box-Cox transformer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "FactorAnalysis" +":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=unknown\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateBoxCoxTransformer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.LinearRegressor] +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJTransforms.InteractionTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" -":hyperparameters" = "`(:bias,)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" ":is_pure_julia" = "`true`" -":human_name" = "linear regressor" -":is_supervised" = "`true`" +":human_name" = "interaction transformer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LinearRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "InteractionTransformer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.ICA] +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateDiscretizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\",)`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.ICA" -":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":load_path" = "MLJTransforms.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" ":is_pure_julia" = "`true`" -":human_name" = "independent component analysis model" +":human_name" = "single variable discretizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "ICA" +":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=unknown\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateDiscretizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.PPCA] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" + +[MLJTransforms.CardinalityReducer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" +":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PPCA" -":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":load_path" = "MLJTransforms.CardinalityReducer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" ":is_pure_julia" = "`true`" -":human_name" = "probabilistic PCA model" +":human_name" = "cardinality reducer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "PPCA" +":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "CardinalityReducer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.RidgeRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJTransforms.OrdinalEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" ":is_pure_julia" = "`true`" -":human_name" = "ridge regressor" -":is_supervised" = "`true`" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "RidgeRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OrdinalEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.KernelPCA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.FillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.KernelPCA" -":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":load_path" = "MLJTransforms.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "kernel prinicipal component analysis model" +":human_name" = "fill imputer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "KernelPCA" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FillImputer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJTransforms.MissingnessEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget ridge regressor" -":is_supervised" = "`true`" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "MultitargetRidgeRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "MissingnessEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.SubspaceLDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJTransforms.ContrastEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" ":is_pure_julia" = "`true`" -":human_name" = "subpace LDA model" -":is_supervised" = "`true`" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "SubspaceLDA" -":target_in_fit" = "`true`" +":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContrastEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.BayesianLDA] +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateStandardizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateStandardizer" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "Bayesian LDA model" -":is_supervised" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "BayesianLDA" -":target_in_fit" = "`true`" +":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.PCA] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateFillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PCA" -":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":load_path" = "MLJTransforms.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "pca" +":human_name" = "single variable fill imputer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "PCA" +":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=unknown\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateFillImputer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":constructor" = "`nothing`" -[MLJLIBSVMInterface.ProbabilisticNuSVC] -":is_wrapper" = "`false`" +[MLJLIBSVMInterface.OneClassSVM] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":prediction_type" = ":unknown" +":load_path" = "MLJLIBSVMInterface.OneClassSVM" ":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "probabilistic ν-support vector classifier" -":is_supervised" = "`true`" +":human_name" = "one-class support vector machine" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "ProbabilisticNuSVC" -":target_in_fit" = "`true`" +":name" = "OneClassSVM" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.EpsilonSVR] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8621,10 +8880,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.LinearSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -8658,10 +8917,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.ProbabilisticSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8695,10 +8954,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.NuSVR] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8732,10 +8991,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.NuSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8769,84 +9028,84 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJLIBSVMInterface.SVC] ":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.ProbabilisticNuSVC] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.SVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "C-support vector classifier" +":human_name" = "probabilistic ν-support vector classifier" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "SVC" +":name" = "ProbabilisticNuSVC" ":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJLIBSVMInterface.OneClassSVM] ":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.SVC] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJLIBSVMInterface.OneClassSVM" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.SVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "one-class support vector machine" -":is_supervised" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" +":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "OneClassSVM" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" +":name" = "SVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJFlux.EntityEmbedder] -":constructor" = "`nothing`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing,)`" @@ -8880,10 +9139,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`nothing`" [MLJFlux.MultitargetNeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8917,10 +9176,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8954,10 +9213,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.ImageClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8991,10 +9250,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkBinaryClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9028,10 +9287,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9065,41 +9324,4 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJEnsembles.EnsembleModel] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" -":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJEnsembles.EnsembleModel" -":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic ensemble model" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" -":package_name" = "MLJEnsembles" -":name" = "EnsembleModel" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`EnsembleModel`" +":constructor" = "`nothing`" diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 5a11e7d..1e96fd3 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -25,6 +25,7 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" diff --git a/test/GaussianProcesses.jl b/test/GaussianProcesses.jl deleted file mode 100755 index 172006b..0000000 --- a/test/GaussianProcesses.jl +++ /dev/null @@ -1,42 +0,0 @@ -module TestGaussianProcesses - -using MLJBase -using Test -using Random:seed! -import CategoricalArrays - -seed!(113355) - -X, y = @load_crabs - -# load code to be tested: -import MLJModels -import GaussianProcesses -using MLJModels.GaussianProcesses_ - -baregp = GPClassifier() - -# split the rows: -allrows = eachindex(y) -train, test = partition(allrows, 0.7, shuffle=true) - -fitresult, cache, report = - MLJBase.fit(baregp, 1, MLJBase.selectrows(X, train), y[train]) -yhat = predict(baregp, fitresult, MLJBase.selectrows(X, test)); - -@test sum(yhat .== y[test]) / length(y[test]) >= 0.7 # around 0.7 - -fitresult, cache, report = MLJBase.fit(baregp, 1, X, y) -yhat2 = predict(baregp, fitresult, MLJBase.selectrows(X, test)); - - -# gp = machine(baregp, X, y) -# fit!(gp) -# yhat2 = predict(gp, MLJBase.selectrows(X, test)) - -@test sum(yhat2 .== y[test]) / length(y[test]) >= 0.7 - -MLJModels.info_dict(baregp) - -end # module -true diff --git a/test/NaiveBayes.jl b/test/NaiveBayes.jl deleted file mode 100755 index fa0103c..0000000 --- a/test/NaiveBayes.jl +++ /dev/null @@ -1,112 +0,0 @@ -module TestNaiveBayes - -using Pkg -using MLJBase -using Test -import Random.seed! - -import MLJModels -import NaiveBayes - -using MLJModels.NaiveBayes_ -using CategoricalArrays - -## GAUSSIAN - -gaussian_classifier = GaussianNBClassifier() -MLJModels.info_dict(gaussian_classifier) - -# gaussian classifier takes continuous features -X, y = @load_iris - -train, test = partition(eachindex(y), 0.6) - -fitresultG, cacheG, reportG = fit(gaussian_classifier, 1, - selectrows(X, train), y[train]); - -gaussian_pred = predict(gaussian_classifier, fitresultG, selectrows(X, test)); - -yhat1 = gaussian_pred[1] -@test Set(classes(yhat1)) == Set(classes(y[1])) - -# test with linear data: -seed!(1234) -x1 = randn(3000); -x2 = randn(3000); -x3 = randn(3000); -X = (x1=x1, x2=x2, x3=x3); -ycont = x1 - x2 -2x3; -y = map(ycont) do η - η > 0 ? "go" : "stop" -end |> categorical; -train, test = partition(eachindex(y), 0.8); - -gaussian_classifier = GaussianNBClassifier() - -fitresultG, cacheG, reportG = MLJBase.fit(gaussian_classifier, 1, - selectrows(X, train), y[train]) - -gaussian_pred = MLJBase.predict_mode(gaussian_classifier, - fitresultG, selectrows(X, test)) - -@test sum(gaussian_pred .!= y[test])/length(y) < 0.05 - - -## MULTINOMIAL - -# first contrive some test data - -# some word counts in children's books about colours: -red = [2, 0, 1, 0, 1] -blue = [4, 1, 2, 3, 2] -green = [0, 2, 0, 6, 1] -X = (red=red, blue=blue, green=green) - -# gender of author: -y = categorical([:m, :f, :m, :f, :m]) -male = y[1] -female = y[2] - -# Note: The smoothing algorithm is to add to the training data, for -# each class observed, a row with every feature getting count of -# alpha. So smoothing also effects the class marginals (is this -# standard)? Only integer values of alpha allowed. - -# computing conditional probabilities by hand with Lagrangian -# smoothing (alpha=1): -red_given_m = 5/16 -blue_given_m = 9/16 -green_given_m = 2/16 -red_given_f = 1/15 -blue_given_f = 5/15 -green_given_f = 9/15 - -m_(red, blue, green) = - 4/7*(red_given_m^red)*(blue_given_m^blue)*(green_given_m^green) -f_(red, blue, green) = - 3/7*(red_given_f^red)*(blue_given_f^blue)*(green_given_f^green) -normalizer(red, blue, green) = m_(red, blue, green) + f_(red, blue, green) -m(a...) = m_(a...)/normalizer(a...) -f(a...) = f_(a...)/normalizer(a...) - -Xnew = (red=[1, 1], blue=[1, 2], green=[1, 3]) - -# prediction by hand: - -yhand =[MLJBase.UnivariateFinite([male, female], [m(1, 1, 1), f(1, 1, 1)]), - MLJBase.UnivariateFinite([male, female], [m(1, 2, 3), f(1, 2, 3)])] - -multinomial_classifier = MultinomialNBClassifier() -MLJModels.info_dict(multinomial_classifier) - -fitresultMLT, cacheMLT, reportMLT = - MLJBase.fit(multinomial_classifier, 1, X, y) - -yhat = MLJBase.predict(multinomial_classifier, fitresultMLT, Xnew) - -# see issue https://github.com/dfdx/NaiveBayes.jl/issues/42 -@test_broken pdf(yhand[1], :m) ≈ pdf(yhat[1], :m) -@test_broken pdf(yhand[1], :f) ≈ pdf(yhat[1], :f) - -end # module -true diff --git a/test/builtins/ThresholdPredictors.jl b/test/builtins/ThresholdPredictors.jl index 51f2aaf..30b0cc7 100644 --- a/test/builtins/ThresholdPredictors.jl +++ b/test/builtins/ThresholdPredictors.jl @@ -1,5 +1,5 @@ module TestThresholdPredictors -using Test, MLJModels, CategoricalArrays +using Test, MLJModels, MLJTransforms, CategoricalArrays using ScientificTypes using CategoricalDistributions diff --git a/test/builtins/Transformers.jl b/test/builtins/Transformers.jl deleted file mode 100644 index 1a3550f..0000000 --- a/test/builtins/Transformers.jl +++ /dev/null @@ -1,647 +0,0 @@ -module TestTransformer - -using Test, MLJModels -using Tables, CategoricalArrays, Random -using ScientificTypes -using StatsBase -using Statistics -using StableRNGs -stable_rng = StableRNGs.StableRNG(123) -using Dates: DateTime, Date, Time, Day, Hour -import MLJBase - -_get(x) = CategoricalArrays.DataAPI.unwrap(x) - - -#### UNIVARIATE DISCRETIZATION #### - -@testset "U-Discr" begin - v = randn(10000) - t = UnivariateDiscretizer(n_classes=100); - result, = MLJBase.fit(t, 1, v) - w = MLJBase.transform(t, result, v) - bad_values = filter(v - MLJBase.inverse_transform(t, result, w)) do x - abs(x) > 0.05 - end - @test length(bad_values)/length(v) < 0.06 - - # scalars: - @test MLJBase.transform(t, result, v[42]) == w[42] - r = MLJBase.inverse_transform(t, result, w)[43] - @test MLJBase.inverse_transform(t, result, w[43]) ≈ r - - # test of permitted abuses of argument: - @test MLJBase.inverse_transform(t, result, _get(w[43])) ≈ r - @test MLJBase.inverse_transform(t, result, map(_get, w)) ≈ - MLJBase.inverse_transform(t, result, w) - - # all transformed vectors should have an identical pool (determined in - # call to fit): - v2 = v[1:3] - w2 = MLJBase.transform(t, result, v2) - @test levels(w2) == levels(w) - -end - -#### STANDARDIZER #### - -@testset begin "standardization" - - # UnivariateStandardizer: - stand = UnivariateStandardizer() - f, = MLJBase.fit(stand, 1, [0, 2, 4]) - @test round.(Int, MLJBase.transform(stand, f, [0,4,8])) == [-1.0,1.0,3.0] - @test round.(Int, MLJBase.inverse_transform(stand, f, [-1, 1, 3])) == - [0, 4, 8] - - N = 5 - rand_char = rand("abcefgh", N) - while length(unique(rand_char)) < 2 - rand_char = rand("abcefgh", N) - end - X = (OverallQual = rand(UInt8, N), - GrLivArea = rand(N), - Neighborhood = categorical(rand_char, ordered=true), - x1stFlrSF = sample(1:10, N, replace=false), - TotalBsmtSF = rand(N)) - - # introduce a field of type `Char`: - x1 = categorical(map(Char, (X.OverallQual |> collect))) - - X = (x1=x1, x2=X[2], x3=X[3], x4=X[4], x5=X[5]) - - stand = Standardizer() - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - - # test inverse: - XX = MLJBase.inverse_transform(stand, f, Xnew) - @test MLJBase.schema(X) == MLJBase.schema(XX) - @test XX.x1 == X.x1 - @test XX.x2 ≈ X.x2 - @test XX.x3 == X.x3 - @test XX.x4 == X.x4 - @test XX.x5 ≈ X.x5 - - # test transformation: - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test MLJBase.std(Xnew[5]) ≈ 1.0 - - # test feature specification (ignore=false): - stand.features = [:x1, :x5] - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[[5,]])) - Xt = MLJBase.transform(stand, f, X) - @test Xnew[1] == X[1] - @test Xnew[2] == X[2] - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test MLJBase.std(Xnew[5]) ≈ 1.0 - - # test on ignoring a feature, even if it's listed in the `features` - stand.ignore = true - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[[2,]])) - Xt = MLJBase.transform(stand, f, X) - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test Xnew[5] == X[5] - - # test warnings about features not encountered in fit or no - # features need transforming: - stand = Standardizer(features=[:x1, :mickey_mouse]) - @test_logs( - (:warn, r"Some specified"), - (:warn, r"No features"), - MLJBase.fit(stand, 1, X) - ) - stand.ignore = true - @test_logs (:warn, r"Some specified") MLJBase.fit(stand, 1, X) - - # features must be specified if ignore=true - @test_throws ArgumentError Standardizer(ignore=true) - - # test count, ordered_factor options: - stand = Standardizer(features=[:x3, :x4], count=true, ordered_factor=true) - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[3:4,])) - Xt = MLJBase.transform(stand, f, X) - @test_throws Exception MLJBase.inverse_transform(stand, f, Xt) - - @test Xnew[1] == X[1] - @test Xnew[2] == X[2] - @test elscitype(X[3]) <: OrderedFactor - @test elscitype(Xnew[3]) <: Continuous - @test MLJBase.std(Xnew[3]) ≈ 1.0 - @test elscitype(X[4]) == Count - @test elscitype(Xnew[4]) <: Continuous - @test MLJBase.std(Xnew[4]) ≈ 1.0 - @test Xnew[5] == X[5] - - stand = Standardizer(features= x-> x == (:x2)) - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test Xnew[5] == X[5] - - # univariate case - stand = Standardizer() - f, _, _ = MLJBase.fit(stand, 1, [0, 2, 4]) - @test round.(Int, MLJBase.transform(stand, f, [0,4,8])) == [-1.0,1.0,3.0] - fp = MLJBase.fitted_params(stand, f) - @test fp.mean ≈ 2.0 - @test fp.std ≈ 2.0 -end - -### TIMETYPE TO CONTINUOUS - -@testset "TimeTypeToContinuous" begin - let dt = [Date(2018, 6, 15) + Day(i) for i=0:10], - transformer = UnivariateTimeTypeToContinuous(; step=Day(1)) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Date(2018, 6, 15), Day(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end - - let dt = [Date(2018, 6, 15) + Day(i) for i=0:10], - transformer = UnivariateTimeTypeToContinuous() - fr, _, _ = @test_logs( - (:warn, r"Cannot add `TimePeriod` `step`"), - MLJBase.fit(transformer, 1, dt) - ) - fr, _, _ = @test_logs (:warn, r"C") MLJBase.fit(transformer, 1, dt) - @test fr == (Date(2018, 6, 15), Day(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - transformer = UnivariateTimeTypeToContinuous(; - step = Hour(1), - zero_time = Time(7, 0, 0), - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Time(7, 0, 0), Hour(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = collect(0:3:30) .% 24 .- 7.0 - diff = map(dt_continuous .- ex) do d - mod(d, 24.0) - end - @test all(diff .≈ 0.0) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - transformer = UnivariateTimeTypeToContinuous() - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Time(0, 0, 0), Hour(24)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = collect(0:3:30) .% 24 ./ 24 - diff = map(dt_continuous .- ex) do d - mod(d, 1.0) - end - @test all(diff .≈ 0.0) - end - - # test log messages - let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10], - step=Hour(1), - zero_time=Date(2018, 6, 15), - transformer = @test_logs( - (:warn, "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`."), - UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - - @test fr == (zero_time, step) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10).*24) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - zero_time=Time(0, 0, 0), - step=Day(1), - transformer = @test_logs( - (:warn, "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`."), - UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - - @test fr == (zero_time, convert(Hour, step)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = Float64.((0:3:30) .% 24)./24 - diff = map(dt_continuous .- ex) do d - mod(d, 1.0) - end - @test all(diff .≈ 0.0) - end - - let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10], - step=Day(1), - zero_time=Date(2018, 6, 15), - transformer = UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - fr, _, _ = @test_logs( - (:warn, r"`Date"), - MLJBase.fit(transformer, 1, dt) - ) - - @test fr == (zero_time, step) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end -end - - -#### UNIVARIATE BOX COX TRANSFORMER #### - -@testset "U-boxcox" begin - # create skewed non-negative vector with a zero value: - Random.seed!(1551) - v = abs.(randn(1000)) - v = v .- minimum(v) - - t = UnivariateBoxCoxTransformer(shift=true) - f, = MLJBase.fit(t, 2, v) - - e = v - MLJBase.inverse_transform(t, f, MLJBase.transform(t, f, v)) - @test sum(abs, e) <= 5000*eps() - -end - - -#### ONE HOT ENCODER #### - -@testset "One-Hot" begin - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - - t = OneHotEncoder() - f, _, report = @test_logs((:info, r"Spawning 3"), - (:info, r"Spawning 3"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test Xt.name__John == float.([false, true, false, true]) - @test Xt.height == X.height - @test Xt.favourite_number__10 == float.([false, false, true, false]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, :name__Mary, - :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # test that *entire* pool of categoricals is used in fit, including - # unseen levels: - f, = @test_logs((:info, r"Spawning 3"), (:info, r"Spawning 3"), - MLJBase.fit(t, 1, MLJBase.selectrows(X,1:2))) - Xtsmall = MLJBase.transform(t, f, X) - @test Xt == Xtsmall - - # test that transform can be applied to subset of the data: - @test MLJBase.transform(t, f, MLJBase.selectcols(X, [:name, :age])) == - MLJBase.selectcols(MLJBase.transform(t, f, X), - [:name__Ben, :name__John, :name__Mary, :age]) - - # test ignore - t = OneHotEncoder(features=[:name,], ignore=true) - f, = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test MLJBase.schema(Xt).names == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - # test exclusion of ordered factors: - t = OneHotEncoder(ordered_factor=false) - f, = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test keys(Xt) == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, :age) - - @test :name in Tables.schema(Xt).names - @test :favourite_number__5 in Tables.schema(Xt).names - @test MLJBase.schema(Xt).scitypes == (OrderedFactor{3}, Continuous, - Continuous, Continuous, - Continuous, Count) - - # test that one may not add new columns: - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23], - gender = categorical(['M', 'M', 'F', 'M'])) - @test_throws Exception MLJBase.transform(t, f, X) - - # test to throw exception when category level mismatch is found - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - Xmiss = (name = categorical(["John", "Mary", "John"], ordered=true), - height = X.height, - favourite_number = X.favourite_number, - age = X.age) - t = OneHotEncoder() - f, = MLJBase.fit(t, 0, X) - @test_throws Exception MLJBase.transform(t, f, Xmiss) - - # test the work on missing values - X = (name = categorical(["Ben", "John", "Mary", "John", missing], ordered=true), - height = [1.85, 1.67, 1.5, 1.67, 1.56], - favourite_number = categorical([7, 5, 10, missing, 5]), - age = [23, 23, 14, 23, 21]) - - t = OneHotEncoder() - f, _, report = @test_logs((:info, r"Spawning 3"), - (:info, r"Spawning 3"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test length(Xt.name__John) == 5 - @test collect(skipmissing(Xt.name__John)) == float.([false, true, false, true]) - @test ismissing(Xt.name__John[5]) - @test Xt.height == X.height - @test length(Xt.favourite_number__10) == 5 - @test collect(skipmissing(Xt.favourite_number__10)) == float.([false, false, true, false]) - @test ismissing(Xt.favourite_number__10[4]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, :name__Mary, - :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # test the work on missing values with drop_last = true - - X = (name = categorical(["Ben", "John", "Mary", "John", missing], ordered=true), - height = [1.85, 1.67, 1.5, 1.67, 1.56], - favourite_number = categorical([7, 5, 10, missing, 5]), - age = [23, 23, 14, 23, 21]) - - t = OneHotEncoder(drop_last = true) - f, _, report = @test_logs((:info, r"Spawning 2"), - (:info, r"Spawning 2"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test length(Xt.name__John) == 5 - @test collect(skipmissing(Xt.name__John)) == float.([false, true, false, true]) - @test ismissing(Xt.name__John[5]) - @test Xt.height == X.height - @test ismissing(Xt.favourite_number__5[4]) - @test collect(skipmissing(Xt.favourite_number__5)) == float.([false, true, false, true]) - @test ismissing(Xt.favourite_number__5[4]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, - :height, :favourite_number__5, - :favourite_number__7, - :age) - - @test_throws Exception Xt.favourite_number__10 - @test_throws Exception Xt.name__Mary - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # Test when the first value is missing - X = (name=categorical([missing, "John", "Mary", "John"]),) - t = OneHotEncoder() - f, _, _ = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test Xt.name__John[1] === Xt.name__Mary[1] === missing - @test Xt.name__John[2:end] == Union{Missing, Float64}[1.0, 0.0, 1.0] - @test Xt.name__Mary[2:end] == Union{Missing, Float64}[0.0, 1.0, 0.0] - -end - - -#### FILL IMPUTER ####' - -@testset "UnivariateFillImputer" begin - vpure = rand(stable_rng, 10) - v = vcat([missing, ], vpure) - filler = median(vpure) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = [missing, 1.0, missing, 2.0, 3.0] - @test MLJBase.transform(imp, f, vnew) ≈ [filler, 1.0, filler, 2.0, 3.0] - - vpure = MLJBase.coerce(rand(stable_rng, "abc", 100), OrderedFactor); - v = vcat([missing, ], vpure) - filler = mode(vpure) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = vcat([missing, ], vpure[end-10:end], [missing, ]) - @test MLJBase.transform(imp, f, vnew) == - vcat([filler, ], vpure[end-10:end], [filler, ]) - - vpure = rand(stable_rng, Int, 10) - v = vcat([missing, ], vpure) - filler = round(Int, median(vpure)) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = [missing, 1, missing, 2, 3] - @test MLJBase.transform(imp, f, vnew) == [filler, 1, filler, 2, 3] - - @test_throws Exception MLJBase.transform(imp, f, [missing, "1", "2"]) - - @test_throws ArgumentError MLJBase.fit(imp, 1, [missing, "1", "2"]) - -end - -@testset "FillImputer" begin - X = ( - x = [missing,ones(10)...], - y = [missing,ones(10)...], - z = [missing,ones(10)...] - ) - - imp = FillImputer() - f, = MLJBase.fit(imp, 1, X) - - fp = MLJBase.fitted_params(imp, f) - @test fp.features_seen_in_fit == [:x, :y, :z] - @test fp.univariate_transformer == MLJModels.UnivariateFillImputer() - @test fp.filler_given_feature[:x] ≈ 1.0 - @test fp.filler_given_feature[:x] ≈ 1.0 - @test fp.filler_given_feature[:x] ≈ 1.0 - - Xnew = MLJBase.selectrows(X, 1:5) - Xt = MLJBase.transform(imp, f, Xnew) - @test all(.!ismissing.(Xt.x)) - @test Xt.x isa Vector{Float64} # no missing - @test all(Xt.x .== 1.0) - - imp = FillImputer(features=[:x,:y]) - f, = MLJBase.fit(imp, 1, X) - Xt = MLJBase.transform(imp, f, Xnew) - @test all(Xt.x .== 1.0) - @test all(Xt.y .== 1.0) - @test ismissing(Xt.z[1]) - - # adding a new feature not seen in fit: - Xnew = (x = X.x, y=X.y, a=X.x) - @test_throws ArgumentError MLJBase.transform(imp, f, Xnew) - - # mixture of features: - X = (x = categorical([missing, missing, missing, missing, - "Old", "Young", "Middle", "Young", - "Old", "Young", "Middle", "Young"]), - y = [missing, ones(11)...], - z = [missing, missing, 1,1,1,1,1,5,1,1,1,1], - a = rand("abc", 12)) - - imp = FillImputer() - f, = MLJBase.fit(imp, 1, X) - Xnew = MLJBase.selectrows(X, 1:4) - Xt = MLJBase.transform(imp, f, Xnew) - - @test all(.!ismissing.(Xt.x)) - @test all(.!ismissing.(Xt.y)) - @test all(.!ismissing.(Xt.z)) - @test all(.!ismissing.(Xt.a)) - - @test Xt.x[1] == mode(skipmissing(X.x)) - @test Xt.y[1] == 1 - @test Xt.z[1] == 1 - - # user specifies a feature explicitly that's not supported: - imp = FillImputer(features=[:x, :a]) # :a of Unknown scitype not supported - @test_logs (:info, r"Feature a will not") MLJBase.fit(imp, 1, X) - -end - - -#### CONTINUOUS ENCODER #### - -@testset "Continuous encoder" begin - - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - rubbish = ["a", "b", "c", "a"], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - - t = ContinuousEncoder() - f, _, _ = @test_logs((:info, r"Some.*dropped\:.*\:rubbish\]"), - MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - @test scitype(Xt) <: MLJBase.Table(MLJBase.Continuous) - s = MLJBase.schema(Xt) - @test s.names == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, :age) - - t = ContinuousEncoder(drop_last=true, one_hot_ordered_factors=true) - f, _, r = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test scitype(Xt) <: MLJBase.Table(MLJBase.Continuous) - s = MLJBase.schema(Xt) - @test s.names == (:name__Ben, :name__John, :height, :favourite_number__5, - :favourite_number__7, :age) - -end - -#### INTERACTION TRANSFORMER #### - -@testset "Interaction Transformer functions" begin - # No column provided, A has scitype Continuous, B has scitype Count - table = (A = [1., 2., 3.], B = [4, 5, 6], C = ["x₁", "x₂", "x₃"]) - @test MLJModels.actualfeatures(nothing, table) == (:A, :B) - # Column provided - @test MLJModels.actualfeatures([:A, :B], table) == (:A, :B) - # Column provided, not in table - @test_throws ArgumentError("Column(s) D are not in the dataset.") MLJModels.actualfeatures([:A, :D], table) - # Non Infinite scitype column provided - @test_throws ArgumentError("Column C's scitype is not Infinite.") MLJModels.actualfeatures([:A, :C], table) -end - - -@testset "Interaction Transformer" begin - # Check constructor sanity checks: order > 1, length(features) > 1 - @test_logs (:warn, "Constraint `model.order > 1` failed; using default: order=2.") InteractionTransformer(order = 1) - @test_logs (:warn, "Constraint `if model.features !== nothing\n"* - " length(model.features) > 1\nelse\n true\nend` failed; "* - "using default: features=nothing.") InteractionTransformer(features = [:A]) - - X = (A = [1, 2, 3], B = [4, 5, 6], C = [7, 8, 9]) - # Default order=2, features=nothing, ie all columns - Xt = MLJBase.transform(InteractionTransformer(), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54] - ) - # order=3, features=nothing, ie all columns - Xt = MLJBase.transform(InteractionTransformer(order=3), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162] - ) - # order=2, features=[:A, :B], ie all columns - Xt =MLJBase.transform(InteractionTransformer(order=2, features=[:A, :B]), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18] - ) - # order=3, features=[:A, :B, :C], some non continuous columns - X = merge(X, (D = ["x₁", "x₂", "x₃"],)) - Xt = MLJBase.transform(InteractionTransformer(order=3, features=[:A, :B, :C]), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162] - ) - # order=2, features=nothing, only continuous columns are dealt with - Xt = MLJBase.transform(InteractionTransformer(order=2), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - ) -end - -end -true diff --git a/test/model_search.jl b/test/model_search.jl index 87b03dd..24eeefe 100644 --- a/test/model_search.jl +++ b/test/model_search.jl @@ -1,7 +1,7 @@ module TestModelSearch using Test -using MLJModels +using MLJModels, MLJTransforms using MLJBase using ScientificTypes using Markdown @@ -36,7 +36,7 @@ tree = info("DecisionTreeRegressor", pkg="DecisionTree") # Note that these tests assume model registry metadata is up to date # with the latest trait values in `src/builtins/`: @test info(ConstantRegressor) == cnst - @test info(Standardizer()) == info("Standardizer", pkg="MLJModels") + @test info(Standardizer()) == info("Standardizer", pkg="MLJTransforms") @test doc("ConstantRegressor", pkg="MLJModels") == cnst.docstring |> Markdown.parse @test_throws MLJModels.ERR_DOC_EXPECTS_STRING doc(ConstantRegressor) @test_throws MLJModels.ERR_DOC_EXPECTS_STRING doc(ConstantRegressor()) diff --git a/test/runtests.jl b/test/runtests.jl index 05fea36..b4ebfd9 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ import Pkg -using Test, MLJModels +using Test, MLJModels, MLJTransforms @testset "metadata" begin @testset "metadata.jl" begin @@ -18,9 +18,6 @@ end @testset "Constant.jl" begin @test include("builtins/Constant.jl") end - @testset "Transformers.jl" begin - @test include("builtins/Transformers.jl") - end @testset "ThresholdPredictors" begin @test include("builtins/ThresholdPredictors.jl") end